14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Secret Labs' Regular Expression Engine
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# re-compatible interface for the sre matching engine
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# This version of the SRE library can be redistributed under CNRI's
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Python 1.6 license.  For any other use, please contact Secret Labs
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# AB (info@pythonware.com).
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Portions of this engine have been developed in cooperation with
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# other compatibility work.
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmr"""Support for regular expressions (RE).
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis module provides regular expression matching operations similar to
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthose found in Perl.  It supports both 8-bit and Unicode strings; both
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe pattern and the strings being processed can contain null bytes and
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcharacters outside the US ASCII range.
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRegular expressions can contain both special and ordinary characters.
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmMost ordinary characters, like "A", "a", or "0", are the simplest
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmregular expressions; they simply match themselves.  You can
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmconcatenate ordinary characters, so last matches the string 'last'.
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe special characters are:
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "."      Matches any character except a newline.
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "^"      Matches the start of the string.
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "$"      Matches the end of the string or just before the newline at
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             the end of the string.
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             Greedy means that it will match as many repetitions as possible.
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "?"      Matches 0 or 1 (greedy) of the preceding RE.
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    *?,+?,?? Non-greedy versions of the previous three special characters.
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    {m,n}    Matches from m to n repetitions of the preceding RE.
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    {m,n}?   Non-greedy version of the above.
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "\\"     Either escapes special characters or signals a special sequence.
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    []       Indicates a set of characters.
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             A "^" as the first character indicates a complementing set.
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "|"      A|B, creates an RE that will match either A or B.
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (...)    Matches the RE inside the parentheses.
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             The contents can be retrieved or matched later in the string.
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?:...)  Non-grouping version of regular parentheses.
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?P<name>...) The substring matched by the group is accessible by name.
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?P=name)     Matches the text matched earlier by the group named name.
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?#...)  A comment; ignored.
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?=...)  Matches if ... matches next, but doesn't consume the string.
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?!...)  Matches if ... doesn't match next.
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?<=...) Matches if preceded by ... (must be fixed length).
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?<!...) Matches if not preceded by ... (must be fixed length).
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (?(id/name)yes|no) Matches yes pattern if the group with id/name matched,
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       the (optional) no pattern otherwise.
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe special sequences consist of "\\" and a character from the list
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbelow.  If the ordinary character is not on the list, then the
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmresulting RE will match the second character.
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \number  Matches the contents of the group of the same number.
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \A       Matches only at the start of the string.
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \Z       Matches only at the end of the string.
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \b       Matches the empty string, but only at the start or end of a word.
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \B       Matches the empty string, but not at the start or end of a word.
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \d       Matches any decimal digit; equivalent to the set [0-9].
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \D       Matches any non-digit character; equivalent to the set [^0-9].
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             With LOCALE, it will match the set [0-9_] plus characters defined
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             as letters for the current locale.
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \W       Matches the complement of \w.
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \\       Matches a literal backslash.
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis module exports the following functions:
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    match    Match a regular expression pattern to the beginning of a string.
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    search   Search a string for the presence of a pattern.
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    sub      Substitute occurrences of a pattern found in a string.
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    subn     Same as sub, but also return the number of substitutions made.
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    split    Split a string by the occurrences of a pattern.
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    findall  Find all occurrences of a pattern in a string.
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    finditer Return an iterator yielding a match object for each match.
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    compile  Compile a pattern into a RegexObject.
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    purge    Clear the regular expression cache.
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    escape   Backslash all non-alphanumerics in a string.
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmSome of the functions in this module takes flags as optional parameters:
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    I  IGNORECASE  Perform case-insensitive matching.
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    M  MULTILINE   "^" matches the beginning of lines (after a newline)
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   as well as the string.
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   "$" matches the end of lines (before a newline) as well
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                   as the end of the string.
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    S  DOTALL      "." matches any character at all, including the newline.
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThis module also defines an exception 'error'.
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sre_compile
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sre_parse
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# public symbols
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = [ "match", "search", "sub", "subn", "split", "findall",
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "UNICODE", "error" ]
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__version__ = "2.2.1"
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# flags
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmI = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmL = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmU = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmM = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmS = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmX = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# sre extensions (experimental, don't rely on these)
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmT = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# sre exception
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmerror = sre_compile.error
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# public interface
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef match(pattern, string, flags=0):
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Try to apply the pattern at the start of the string, returning
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a match object, or None if no match was found."""
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).match(string)
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef search(pattern, string, flags=0):
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Scan through string looking for a match to the pattern, returning
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a match object, or None if no match was found."""
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).search(string)
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef sub(pattern, repl, string, count=0, flags=0):
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return the string obtained by replacing the leftmost
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    non-overlapping occurrences of the pattern in string by the
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    replacement repl.  repl can be either a string or a callable;
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if a string, backslash escapes in it are processed.  If it is
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a callable, it's passed the match object and must return
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a replacement string to be used."""
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).sub(repl, string, count)
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef subn(pattern, repl, string, count=0, flags=0):
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return a 2-tuple containing (new_string, number).
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    new_string is the string obtained by replacing the leftmost
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    non-overlapping occurrences of the pattern in the source
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    string by the replacement repl.  number is the number of
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    substitutions that were made. repl can be either a string or a
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    callable; if a string, backslash escapes in it are processed.
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If it is a callable, it's passed the match object and must
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return a replacement string to be used."""
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).subn(repl, string, count)
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef split(pattern, string, maxsplit=0, flags=0):
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Split the source string by the occurrences of the pattern,
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    returning a list containing the resulting substrings."""
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).split(string, maxsplit)
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef findall(pattern, string, flags=0):
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """Return a list of all non-overlapping matches in the string.
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If one or more groups are present in the pattern, return a
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    list of groups; this will be a list of tuples if the pattern
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    has more than one group.
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Empty matches are included in the result."""
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags).findall(string)
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif sys.hexversion >= 0x02020000:
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    __all__.append("finditer")
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def finditer(pattern, string, flags=0):
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        """Return an iterator over all non-overlapping matches in the
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        string.  For each match, the iterator returns a match object.
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Empty matches are included in the result."""
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return _compile(pattern, flags).finditer(string)
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef compile(pattern, flags=0):
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Compile a regular expression pattern, returning a pattern object."
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags)
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef purge():
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Clear the regular expression cache"
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _cache.clear()
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _cache_repl.clear()
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef template(pattern, flags=0):
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Compile a template pattern, returning a pattern object"
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile(pattern, flags|T)
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_alphanum = {}
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfor c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _alphanum[c] = 1
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdel c
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef escape(pattern):
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    "Escape all non-alphanumeric characters in pattern."
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s = list(pattern)
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    alphanum = _alphanum
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for i, c in enumerate(pattern):
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if c not in alphanum:
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if c == "\000":
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s[i] = "\\000"
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s[i] = "\\" + c
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return pattern[:0].join(s)
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# internals
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_cache = {}
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_cache_repl = {}
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_pattern_type = type(sre_compile.compile("", 0))
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm_MAXCACHE = 100
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _compile(*key):
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # internal: compile pattern
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    cachekey = (type(key[0]),) + key
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    p = _cache.get(cachekey)
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if p is not None:
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return p
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    pattern, flags = key
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if isinstance(pattern, _pattern_type):
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if flags:
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            raise ValueError('Cannot process flags argument with a compiled pattern')
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return pattern
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not sre_compile.isstring(pattern):
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise TypeError, "first argument must be string or compiled pattern"
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        p = sre_compile.compile(pattern, flags)
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except error, v:
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise error, v # invalid expression
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if len(_cache) >= _MAXCACHE:
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _cache.clear()
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _cache[cachekey] = p
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return p
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _compile_repl(*key):
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # internal: compile replacement pattern
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    p = _cache_repl.get(key)
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if p is not None:
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return p
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    repl, pattern = key
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        p = sre_parse.parse_template(repl, pattern)
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except error, v:
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        raise error, v # invalid expression
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if len(_cache_repl) >= _MAXCACHE:
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        _cache_repl.clear()
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    _cache_repl[key] = p
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return p
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _expand(pattern, match, template):
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # internal: match.expand implementation hook
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    template = sre_parse.parse_template(template, pattern)
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return sre_parse.expand_template(template, match)
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _subx(pattern, template):
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # internal: pattern.sub/subn implementation helper
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    template = _compile_repl(template, pattern)
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not template[0] and len(template[1]) == 1:
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # literal replacement
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return template[1][0]
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def filter(match, template=template):
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return sre_parse.expand_template(template, match)
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return filter
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# register myself for pickling
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport copy_reg
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef _pickle(p):
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return _compile, (p.pattern, p.flags)
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcopy_reg.pickle(_pattern_type, _pickle, _compile)
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# --------------------------------------------------------------------
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# experimental stuff (see python-dev discussions for details)
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Scanner:
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, lexicon, flags=0):
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        from sre_constants import BRANCH, SUBPATTERN
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.lexicon = lexicon
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # combine phrases into a compound pattern
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        p = []
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s = sre_parse.Pattern()
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s.flags = flags
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for phrase, action in lexicon:
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            p.append(sre_parse.SubPattern(s, [
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ]))
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s.groups = len(p)+1
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.scanner = sre_compile.compile(p)
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def scan(self, string):
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        result = []
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        append = result.append
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        match = self.scanner.scanner(string).match
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        i = 0
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        while 1:
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            m = match()
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not m:
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            j = m.end()
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if i == j:
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            action = self.lexicon[m.lastindex-1][1]
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if hasattr(action, '__call__'):
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.match = m
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                action = action(self, m.group())
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if action is not None:
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                append(action)
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            i = j
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return result, string[i:]
327