15f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian/* 25f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Copyright (C) 2009 Apple Inc. All rights reserved. 35f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 45f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Redistribution and use in source and binary forms, with or without 55f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * modification, are permitted provided that the following conditions 65f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * are met: 75f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 1. Redistributions of source code must retain the above copyright 85f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * notice, this list of conditions and the following disclaimer. 95f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 2. Redistributions in binary form must reproduce the above copyright 105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * notice, this list of conditions and the following disclaimer in the 115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * documentation and/or other materials provided with the distribution. 125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#ifndef RegexParser_h 275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#define RegexParser_h 285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/Platform.h> 305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#if ENABLE(YARR) 325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <UString.h> 345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/ASCIICType.h> 355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/unicode/Unicode.h> 365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <limits.h> 375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiannamespace JSC { namespace Yarr { 395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianenum BuiltInCharacterClassID { 415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian DigitClassID, 425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian SpaceClassID, 435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian WordClassID, 445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian NewlineClassID, 455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian}; 465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian// The Parser class should not be used directly - only via the Yarr::parse() method. 485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiantemplate<class Delegate> 495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianclass Parser { 505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianprivate: 515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian template<class FriendDelegate> 525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit); 535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian enum ErrorCode { 555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian NoError, 565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian PatternTooLarge, 575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian QuantifierOutOfOrder, 585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian QuantifierWithoutAtom, 595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian MissingParentheses, 605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParenthesesUnmatched, 615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParenthesesTypeInvalid, 625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian CharacterClassUnmatched, 635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian CharacterClassOutOfOrder, 645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian EscapeUnterminated, 655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian NumberOfErrorCodes 665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian }; 675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * CharacterClassParserDelegate: 705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The class CharacterClassParserDelegate is used in the parsing of character 725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * classes. This class handles detection of character ranges. This class 735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * implements enough of the delegate interface such that it can be passed to 745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused 755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * to perform the parsing of escape characters in character sets. 765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian class CharacterClassParserDelegate { 785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian public: 795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err) 805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian : m_delegate(delegate) 815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_err(err) 825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_state(empty) 835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * begin(): 885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Called at beginning of construction. 905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void begin(bool invert) 925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassBegin(invert); 945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomPatternCharacterUnescaped(): 985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * This method is called directly from parseCharacterClass(), to report a new 1005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * pattern character token. This method differs from atomPatternCharacter(), 1015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * which will be called from parseEscape(), since a hypen provided via this 1025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * method may be indicating a character range, but a hyphen parsed by 1035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseEscape() cannot be interpreted as doing so. 1045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 1055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void atomPatternCharacterUnescaped(UChar ch) 1065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 1075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian switch (m_state) { 1085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case empty: 1095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_character = ch; 1105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_state = cachedCharacter; 1115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 1125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case cachedCharacter: 1145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (ch == '-') 1155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_state = cachedCharacterHyphen; 1165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else { 1175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassAtom(m_character); 1185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_character = ch; 1195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 1215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case cachedCharacterHyphen: 1235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (ch >= m_character) 1245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassRange(m_character, ch); 1255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 1265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = CharacterClassOutOfOrder; 1275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_state = empty; 1285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 1325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomPatternCharacter(): 1335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 1345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Adds a pattern character, called by parseEscape(), as such will not 1355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * interpret a hyphen as indicating a character range. 1365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 1375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void atomPatternCharacter(UChar ch) 1385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 1395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Flush if a character is already pending to prevent the 1405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // hyphen from begin interpreted as indicating a range. 1415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if((ch == '-') && (m_state == cachedCharacter)) 1425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian flush(); 1435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian atomPatternCharacterUnescaped(ch); 1455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 1485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomBuiltInCharacterClass(): 1495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 1505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Adds a built-in character class, called by parseEscape(). 1515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 1525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) 1535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 1545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian flush(); 1555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassBuiltIn(classID, invert); 1565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 1595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * end(): 1605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 1615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Called at end of construction. 1625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 1635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void end() 1645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 1655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian flush(); 1665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassEnd(); 1675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // parseEscape() should never call these delegate methods when 1705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // invoked with inCharacterClass set. 1715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); } 1725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); } 1735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian private: 1755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void flush() 1765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 1775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen 1785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassAtom(m_character); 1795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_state == cachedCharacterHyphen) 1805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomCharacterClassAtom('-'); 1815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_state = empty; 1825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 1835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian Delegate& m_delegate; 1855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ErrorCode& m_err; 1865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian enum CharacterClassConstructionState { 1875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian empty, 1885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian cachedCharacter, 1895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian cachedCharacterHyphen, 1905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } m_state; 1915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian UChar m_character; 1925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian }; 1935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 1945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit) 1955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian : m_delegate(delegate) 1965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_backReferenceLimit(backReferenceLimit) 1975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_err(NoError) 1985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_data(pattern.data()) 1995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_size(pattern.size()) 2005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_index(0) 2015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian , m_parenthesesNestingDepth(0) 2025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 2035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 2045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 2065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseEscape(): 2075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 2085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Helper for parseTokens() AND parseCharacterClass(). 2095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Unlike the other parser methods, this function does not report tokens 2105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * directly to the member delegate (m_delegate), instead tokens are 2115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * emitted to the delegate provided as an argument. In the case of atom 2125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * escapes, parseTokens() will call parseEscape() passing m_delegate as 2135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * an argument, and as such the escape will be reported to the delegate. 2145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 2155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * However this method may also be used by parseCharacterClass(), in which 2165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * case a CharacterClassParserDelegate will be passed as the delegate that 2175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * tokens should be added to. A boolean flag is also provided to indicate 2185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * whether that an escape in a CharacterClass is being parsed (some parsing 2195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * rules change in this context). 2205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 2215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The boolean value returned by this method indicates whether the token 2225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parsed was an atom (outside of a characted class \b and \B will be 2235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * interpreted as assertions). 2245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 2255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian template<bool inCharacterClass, class EscapeDelegate> 2265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool parseEscape(EscapeDelegate& delegate) 2275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 2285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(!m_err); 2295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peek() == '\\'); 2305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (atEndOfPattern()) { 2335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = EscapeUnterminated; 2345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return false; 2355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 2365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian switch (peek()) { 2385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Assertions 2395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'b': 2405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (inCharacterClass) 2425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\b'); 2435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else { 2445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.assertionWordBoundary(false); 2455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return false; 2465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 2475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'B': 2495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (inCharacterClass) 2515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('B'); 2525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else { 2535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.assertionWordBoundary(true); 2545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return false; 2555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 2565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // CharacterClassEscape 2595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'd': 2605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(DigitClassID, false); 2625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 's': 2645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(SpaceClassID, false); 2665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'w': 2685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(WordClassID, false); 2705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'D': 2725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(DigitClassID, true); 2745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'S': 2765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(SpaceClassID, true); 2785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'W': 2805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 2815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBuiltInCharacterClass(WordClassID, true); 2825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 2835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // DecimalEscape 2855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '1': 2865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '2': 2875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '3': 2885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '4': 2895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '5': 2905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '6': 2915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '7': 2925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '8': 2935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '9': { 2945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape. 2955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // First, try to parse this as backreference. 2965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (!inCharacterClass) { 2975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParseState state = saveState(); 2985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 2995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned backReference = consumeNumber(); 3005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (backReference <= m_backReferenceLimit) { 3015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomBackReference(backReference); 3025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian restoreState(state); 3065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Not a backreference, and not octal. 3095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (peek() >= '8') { 3105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\\'); 3115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Fall-through to handle this as an octal escape. 3155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Octal escape 3185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '0': 3195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter(consumeOctal()); 3205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // ControlEscape 3235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'f': 3245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\f'); 3265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'n': 3285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\n'); 3305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'r': 3325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\r'); 3345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 't': 3365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\t'); 3385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'v': 3405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\v'); 3425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // ControlLetter 3455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'c': { 3465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParseState state = saveState(); 3475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (!atEndOfPattern()) { 3495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int control = consume(); 3505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // To match Firefox, inside a character class, we also accept numbers and '_' as control characters. 3525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) { 3535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter(control & 0x1f); 3545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian restoreState(state); 3585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('\\'); 3595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // HexEscape 3635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'x': { 3645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int x = tryConsumeHex(2); 3665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (x == -1) 3675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('x'); 3685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 3695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter(x); 3705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // UnicodeEscape 3745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case 'u': { 3755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 3765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int u = tryConsumeHex(4); 3775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (u == -1) 3785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter('u'); 3795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 3805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter(u); 3815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 3825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // IdentityEscape 3855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian default: 3865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian delegate.atomPatternCharacter(consume()); 3875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return true; 3905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 3915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 3925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 3935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseAtomEscape(), parseCharacterClassEscape(): 3945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 3955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * These methods alias to parseEscape(). 3965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 3975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool parseAtomEscape() 3985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 3995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return parseEscape<false>(m_delegate); 4005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseCharacterClassEscape(CharacterClassParserDelegate& delegate) 4025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 4035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseEscape<true>(delegate); 4045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 4075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseCharacterClass(): 4085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 4095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape) 4105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * to an instance of CharacterClassParserDelegate, to describe the character class to the 4115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * delegate. 4125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 4135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseCharacterClass() 4145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 4155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(!m_err); 4165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peek() == '['); 4175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 4185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err); 4205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian characterClassConstructor.begin(tryConsume('^')); 4225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian while (!atEndOfPattern()) { 4245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian switch (peek()) { 4255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case ']': 4265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 4275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian characterClassConstructor.end(); 4285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return; 4295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '\\': 4315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseCharacterClassEscape(characterClassConstructor); 4325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 4335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian default: 4355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian characterClassConstructor.atomPatternCharacterUnescaped(consume()); 4365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_err) 4395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return; 4405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = CharacterClassUnmatched; 4435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 4465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseParenthesesBegin(): 4475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 4485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns. 4495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 4505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseParenthesesBegin() 4515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 4525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(!m_err); 4535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peek() == '('); 4545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 4555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (tryConsume('?')) { 4575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (atEndOfPattern()) { 4585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = ParenthesesTypeInvalid; 4595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return; 4605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian switch (consume()) { 4635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case ':': 4645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomParenthesesSubpatternBegin(false); 4655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 4665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '=': 4685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomParentheticalAssertionBegin(); 4695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 4705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '!': 4725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomParentheticalAssertionBegin(true); 4735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 4745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian default: 4765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = ParenthesesTypeInvalid; 4775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } else 4795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomParenthesesSubpatternBegin(); 4805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ++m_parenthesesNestingDepth; 4825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 4835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 4855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseParenthesesEnd(): 4865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 4875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses). 4885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 4895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseParenthesesEnd() 4905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 4915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(!m_err); 4925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peek() == ')'); 4935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 4945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 4955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_parenthesesNestingDepth > 0) 4965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomParenthesesEnd(); 4975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 4985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = ParenthesesUnmatched; 4995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian --m_parenthesesNestingDepth; 5015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 5025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 5045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseQuantifier(): 5055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 5065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers. 5075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 5085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max) 5095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 5105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(!m_err); 5115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(min <= max); 5125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (lastTokenWasAnAtom) 5145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.quantifyAtom(min, max, !tryConsume('?')); 5155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 5165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = QuantifierWithoutAtom; 5175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 5185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 5205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parseTokens(): 5215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 5225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * This method loops over the input pattern reporting tokens to the delegate. 5235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The method returns when a parse error is detected, or the end of the pattern 5245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * is reached. One piece of state is tracked around the loop, which is whether 5255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * the last token passed to the delegate was an atom (this is necessary to detect 5265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * a parse error when a quantifier provided without an atom to quantify). 5275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 5285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void parseTokens() 5295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 5305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool lastTokenWasAnAtom = false; 5315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian while (!atEndOfPattern()) { 5335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian switch (peek()) { 5345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '|': 5355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.disjunction(); 5375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '(': 5415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseParenthesesBegin(); 5425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case ')': 5465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseParenthesesEnd(); 5475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = true; 5485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '^': 5515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.assertionBOL(); 5535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '$': 5575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.assertionEOL(); 5595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '.': 5635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomBuiltInCharacterClass(NewlineClassID, true); 5655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = true; 5665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '[': 5695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseCharacterClass(); 5705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = true; 5715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '\\': 5745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = parseAtomEscape(); 5755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '*': 5785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX); 5805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '+': 5845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX); 5865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '?': 5905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseQuantifier(lastTokenWasAnAtom, 0, 1); 5925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 5935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 5945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian case '{': { 5965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParseState state = saveState(); 5975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 5985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 5995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (peekIsDigit()) { 6005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned min = consumeNumber(); 6015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned max = min; 6025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (tryConsume(',')) 6045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian max = peekIsDigit() ? consumeNumber() : UINT_MAX; 6055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (tryConsume('}')) { 6075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (min <= max) 6085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseQuantifier(lastTokenWasAnAtom, min, max); 6095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 6105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = QuantifierOutOfOrder; 6115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = false; 6125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian break; 6135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian restoreState(state); 6175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } // if we did not find a complete quantifer, fall through to the default case. 6185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian default: 6205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.atomPatternCharacter(consume()); 6215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian lastTokenWasAnAtom = true; 6225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_err) 6255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return; 6265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_parenthesesNestingDepth > 0) 6295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = MissingParentheses; 6305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian /* 6335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parse(): 6345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 6355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * This method calls regexBegin(), calls parseTokens() to parse over the input 6365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * patterns, calls regexEnd() or regexError() as appropriate, and converts any 6375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * error code to a const char* for a result. 6385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 6395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian const char* parse() 6405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 6415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.regexBegin(); 6425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_size > MAX_PATTERN_SIZE) 6445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_err = PatternTooLarge; 6455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 6465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian parseTokens(); 6475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(atEndOfPattern() || m_err); 6485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (m_err) 6505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.regexError(); 6515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian else 6525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_delegate.regexEnd(); 6535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // The order of this array must match the ErrorCode enum. 6555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian static const char* errorMessages[NumberOfErrorCodes] = { 6565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 0, // NoError 6575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "regular expression too large", 6585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "numbers out of order in {} quantifier", 6595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "nothing to repeat", 6605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "missing )", 6615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "unmatched parentheses", 6625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "unrecognized character after (?", 6635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "missing terminating ] for character class", 6645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "range out of order in character class", 6655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian "\\ at end of pattern" 6665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian }; 6675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return errorMessages[m_err]; 6695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Misc helper functions: 6735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian typedef unsigned ParseState; 6755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParseState saveState() 6775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 6785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return m_index; 6795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian void restoreState(ParseState state) 6825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 6835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian m_index = state; 6845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool atEndOfPattern() 6875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 6885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(m_index <= m_size); 6895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return m_index == m_size; 6905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int peek() 6935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 6945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(m_index < m_size); 6955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return m_data[m_index]; 6965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 6975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 6985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool peekIsDigit() 6995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return !atEndOfPattern() && WTF::isASCIIDigit(peek()); 7015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned peekDigit() 7045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peekIsDigit()); 7065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return peek() - '0'; 7075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int consume() 7105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(m_index < m_size); 7125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return m_data[m_index++]; 7135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned consumeDigit() 7165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(peekIsDigit()); 7185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return consume() - '0'; 7195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned consumeNumber() 7225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned n = consumeDigit(); 7245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // check for overflow. 7255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) { 7265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian n = newValue; 7275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian consume(); 7285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return n; 7305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned consumeOctal() 7335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ASSERT(WTF::isASCIIOctalDigit(peek())); 7355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned n = consumeDigit(); 7375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek())) 7385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian n = n * 8 + consumeDigit(); 7395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return n; 7405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian bool tryConsume(UChar ch) 7435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (atEndOfPattern() || (m_data[m_index] != ch)) 7455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return false; 7465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ++m_index; 7475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return true; 7485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int tryConsumeHex(int count) 7515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian { 7525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ParseState state = saveState(); 7535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian int n = 0; 7555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian while (count--) { 7565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) { 7575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian restoreState(state); 7585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return -1; 7595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian n = (n << 4) | WTF::toASCIIHexValue(consume()); 7615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return n; 7635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian } 7645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian Delegate& m_delegate; 7665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned m_backReferenceLimit; 7675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian ErrorCode m_err; 7685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian const UChar* m_data; 7695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned m_size; 7705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned m_index; 7715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian unsigned m_parenthesesNestingDepth; 7725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian // Derived by empirical testing of compile time in PCRE and WREC. 7745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian static const unsigned MAX_PATTERN_SIZE = 1024 * 1024; 7755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian}; 7765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 7775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian/* 7785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Yarr::parse(): 7795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 7805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The parse method is passed a pattern to be parsed and a delegate upon which 7815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * callbacks will be made to record the parsed tokens forming the regex. 7825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Yarr::parse() returns null on success, or a const C string providing an error 7835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * message where a parse error occurs. 7845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 7855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The Delegate must implement the following interface: 7865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 7875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void assertionBOL(); 7885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void assertionEOL(); 7895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void assertionWordBoundary(bool invert); 7905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 7915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomPatternCharacter(UChar ch); 7925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert); 7935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomCharacterClassBegin(bool invert) 7945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomCharacterClassAtom(UChar ch) 7955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomCharacterClassRange(UChar begin, UChar end) 7965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert) 7975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomCharacterClassEnd() 7985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomParenthesesSubpatternBegin(bool capture = true); 7995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomParentheticalAssertionBegin(bool invert = false); 8005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomParenthesesEnd(); 8015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void atomBackReference(unsigned subpatternId); 8025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void quantifyAtom(unsigned min, unsigned max, bool greedy); 8045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void disjunction(); 8065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void regexBegin(); 8085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void regexEnd(); 8095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * void regexError(); 8105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Before any call recording tokens are made, regexBegin() will be called on the 8125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * delegate once. Once parsing is complete either regexEnd() or regexError() will 8135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * be called, as appropriate. 8145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The regular expression is described by a sequence of assertion*() and atom*() 8165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * callbacks to the delegate, describing the terms in the regular expression. 8175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Following an atom a quantifyAtom() call may occur to indicate that the previous 8185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atom should be quantified. In the case of atoms described across multiple 8195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * calls (parentheses and character classes) the call to quantifyAtom() will come 8205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * after the call to the atom*End() method, never after atom*Begin(). 8215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Character classes may either be described by a single call to 8235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls. 8245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * In the latter case, ...Begin() will be called, followed by a sequence of 8255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End(). 8265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 8275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Sequences of atoms and assertions are broken into alternatives via calls to 8285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * disjunction(). Assertions, atoms, and disjunctions emitted between calls to 8295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern. 8305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomParenthesesBegin() is passed a subpatternId. In the case of a regular 8315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * capturing subpattern, this will be the subpatternId associated with these 8325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parentheses, and will also by definition be the lowest subpatternId of these 8335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parentheses and of any nested paretheses. The atomParenthesesEnd() method 8345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * is passed the subpatternId of the last capturing subexpression nested within 8355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * these paretheses. In the case of a capturing subpattern with no nested 8365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * capturing subpatterns, the same subpatternId will be passed to the begin and 8375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * end functions. In the case of non-capturing subpatterns the subpatternId 8385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * passed to the begin method is also the first possible subpatternId that might 8395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * be nested within these paretheses. If a set of non-capturing parentheses does 8405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * not contain any capturing subpatterns, then the subpatternId passed to begin 8415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * will be greater than the subpatternId passed to end. 8425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */ 8435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 8445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiantemplate<class Delegate> 8455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianconst char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX) 8465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian{ 8475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse(); 8485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian} 8495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 8505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian} } // namespace JSC::Yarr 8515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 8525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#endif 8535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian 8545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#endif // RegexParser_h 855