10a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
20a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Secret Labs' Regular Expression Engine
30a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
40a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# convert template to internal format
50a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
60a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
70a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
80a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# See the sre.py file for information on usage and redistribution.
90a8c90248264a8b26970b4473770bcc3df8515fJosh Gao#
100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao"""Internal support module for sre"""
120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
130a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport _sre, sys
140a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoimport sre_parse
150a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom sre_constants import *
160a8c90248264a8b26970b4473770bcc3df8515fJosh Gaofrom _sre import MAXREPEAT
170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
180a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoassert _sre.MAGIC == MAGIC, "SRE module mismatch"
190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
200a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoif _sre.CODESIZE == 2:
210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    MAXCODE = 65535
220a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    MAXCODE = 0xFFFFFFFFL
240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
250a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _identityfunction(x):
260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return x
270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_SUCCESS_CODES = set([SUCCESS, FAILURE])
310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
330a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _compile(code, pattern, flags):
340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # internal: compile a (sub)pattern
350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit = code.append
360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _len = len
370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    LITERAL_CODES = _LITERAL_CODES
380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    REPEATING_CODES = _REPEATING_CODES
390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    SUCCESS_CODES = _SUCCESS_CODES
400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    ASSERT_CODES = _ASSERT_CODES
410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for op, av in pattern:
420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if op in LITERAL_CODES:
430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_IGNORECASE:
440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[OP_IGNORE[op]])
450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(_sre.getlower(av, flags))
460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[op])
480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av)
490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is IN:
500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_IGNORECASE:
510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[OP_IGNORE[op]])
520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                def fixup(literal, flags=flags):
530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    return _sre.getlower(literal, flags)
540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[op])
560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                fixup = _identityfunction
570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            skip = _len(code); emit(0)
580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _compile_charset(av, flags, code, fixup)
590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            code[skip] = _len(code) - skip
600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is ANY:
610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_DOTALL:
620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[ANY_ALL])
630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[ANY])
650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op in REPEATING_CODES:
660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_TEMPLATE:
670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                raise error, "internal: unsupported template operator"
680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[REPEAT])
690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                skip = _len(code); emit(0)
700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[0])
710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[1])
720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _compile(code, av[2], flags)
730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[SUCCESS])
740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skip] = _len(code) - skip
750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif _simple(av) and op is not REPEAT:
760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if op is MAX_REPEAT:
770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    emit(OPCODES[REPEAT_ONE])
780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    emit(OPCODES[MIN_REPEAT_ONE])
800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                skip = _len(code); emit(0)
810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[0])
820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[1])
830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _compile(code, av[2], flags)
840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[SUCCESS])
850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skip] = _len(code) - skip
860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[REPEAT])
880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                skip = _len(code); emit(0)
890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[0])
900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(av[1])
910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _compile(code, av[2], flags)
920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skip] = _len(code) - skip
930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if op is MAX_REPEAT:
940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    emit(OPCODES[MAX_UNTIL])
950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    emit(OPCODES[MIN_UNTIL])
970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is SUBPATTERN:
980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if av[0]:
990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[MARK])
1000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit((av[0]-1)*2)
1010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # _compile_info(code, av[1], flags)
1020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _compile(code, av[1], flags)
1030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if av[0]:
1040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[MARK])
1050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit((av[0]-1)*2+1)
1060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op in SUCCESS_CODES:
1070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op in ASSERT_CODES:
1090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            skip = _len(code); emit(0)
1110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if av[0] >= 0:
1120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(0) # look ahead
1130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
1140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                lo, hi = av[1].getwidth()
1150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if lo != hi:
1160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    raise error, "look-behind requires fixed-width pattern"
1170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(lo) # look behind
1180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _compile(code, av[1], flags)
1190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[SUCCESS])
1200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            code[skip] = _len(code) - skip
1210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is CALL:
1220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            skip = _len(code); emit(0)
1240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _compile(code, av, flags)
1250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[SUCCESS])
1260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            code[skip] = _len(code) - skip
1270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is AT:
1280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_MULTILINE:
1300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                av = AT_MULTILINE.get(av, av)
1310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_LOCALE:
1320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                av = AT_LOCALE.get(av, av)
1330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif flags & SRE_FLAG_UNICODE:
1340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                av = AT_UNICODE.get(av, av)
1350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(ATCODES[av])
1360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is BRANCH:
1370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tail = []
1390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            tailappend = tail.append
1400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for av in av[1]:
1410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                skip = _len(code); emit(0)
1420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # _compile_info(code, av, flags)
1430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _compile(code, av, flags)
1440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[JUMP])
1450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                tailappend(_len(code)); emit(0)
1460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skip] = _len(code) - skip
1470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(0) # end of branch
1480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            for tail in tail:
1490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[tail] = _len(code) - tail
1500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is CATEGORY:
1510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_LOCALE:
1530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                av = CH_LOCALE[av]
1540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif flags & SRE_FLAG_UNICODE:
1550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                av = CH_UNICODE[av]
1560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(CHCODES[av])
1570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is GROUPREF:
1580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_IGNORECASE:
1590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[OP_IGNORE[op]])
1600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
1610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[op])
1620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(av-1)
1630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is GROUPREF_EXISTS:
1640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(OPCODES[op])
1650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(av[0]-1)
1660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            skipyes = _len(code); emit(0)
1670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            _compile(code, av[1], flags)
1680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if av[2]:
1690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(OPCODES[JUMP])
1700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                skipno = _len(code); emit(0)
1710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skipyes] = _len(code) - skipyes + 1
1720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                _compile(code, av[2], flags)
1730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skipno] = _len(code) - skipno
1740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
1750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                code[skipyes] = _len(code) - skipyes + 1
1760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
1770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise ValueError, ("unsupported operand type", op)
1780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
1790a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _compile_charset(charset, flags, code, fixup=None):
1800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compile charset subprogram
1810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit = code.append
1820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if fixup is None:
1830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        fixup = _identityfunction
1840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for op, av in _optimize_charset(charset, fixup):
1850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(OPCODES[op])
1860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if op is NEGATE:
1870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            pass
1880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is LITERAL:
1890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(fixup(av))
1900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is RANGE:
1910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(fixup(av[0]))
1920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            emit(fixup(av[1]))
1930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is CHARSET:
1940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            code.extend(av)
1950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is BIGCHARSET:
1960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            code.extend(av)
1970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif op is CATEGORY:
1980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if flags & SRE_FLAG_LOCALE:
1990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(CHCODES[CH_LOCALE[av]])
2000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif flags & SRE_FLAG_UNICODE:
2010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(CHCODES[CH_UNICODE[av]])
2020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
2030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                emit(CHCODES[av])
2040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        else:
2050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            raise error, "internal: unsupported set operator"
2060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit(OPCODES[FAILURE])
2070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2080a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _optimize_charset(charset, fixup):
2090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # internal: optimize character set
2100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    out = []
2110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    outappend = out.append
2120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    charmap = [0]*256
2130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
2140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for op, av in charset:
2150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if op is NEGATE:
2160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                outappend((op, av))
2170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is LITERAL:
2180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                charmap[fixup(av)] = 1
2190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is RANGE:
2200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for i in range(fixup(av[0]), fixup(av[1])+1):
2210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    charmap[i] = 1
2220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is CATEGORY:
2230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # XXX: could append to charmap tail
2240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return charset # cannot compress
2250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except IndexError:
2260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # character set contains unicode characters
2270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return _optimize_unicode(charset, fixup)
2280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compress character map
2290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    i = p = n = 0
2300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    runs = []
2310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    runsappend = runs.append
2320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for c in charmap:
2330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if c:
2340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if n == 0:
2350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                p = i
2360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = n + 1
2370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        elif n:
2380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            runsappend((p, n))
2390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            n = 0
2400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        i = i + 1
2410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if n:
2420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        runsappend((p, n))
2430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if len(runs) <= 2:
2440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # use literal/range
2450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for p, n in runs:
2460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if n == 1:
2470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                outappend((LITERAL, p))
2480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
2490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                outappend((RANGE, (p, p+n-1)))
2500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(out) < len(charset):
2510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return out
2520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
2530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # use bitmap
2540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        data = _mk_bitmap(charmap)
2550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        outappend((CHARSET, data))
2560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return out
2570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return charset
2580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2590a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _mk_bitmap(bits):
2600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    data = []
2610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    dataappend = data.append
2620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _sre.CODESIZE == 2:
2630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        start = (1, 0)
2640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
2650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        start = (1L, 0L)
2660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    m, v = start
2670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for c in bits:
2680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if c:
2690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            v = v + m
2700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        m = m + m
2710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if m > MAXCODE:
2720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            dataappend(v)
2730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            m, v = start
2740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return data
2750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# To represent a big charset, first a bitmap of all characters in the
2770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# set is constructed. Then, this bitmap is sliced into chunks of 256
2780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# characters, duplicate chunks are eliminated, and each chunk is
2790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# given a number. In the compiled expression, the charset is
2800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# represented by a 16-bit word sequence, consisting of one word for
2810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# the number of different chunks, a sequence of 256 bytes (128 words)
2820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# of chunk numbers indexed by their original chunk position, and a
2830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# sequence of chunks (16 words each).
2840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Compression is normally good: in a typical charset, large ranges of
2860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Unicode will be either completely excluded (e.g. if only cyrillic
2870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# letters are to be matched), or completely included (e.g. if large
2880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# subranges of Kanji match). These ranges will be represented by
2890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# chunks of all one-bits or all zero-bits.
2900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# Matching can be also done efficiently: the more significant byte of
2920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# the Unicode character is an index into the chunk number, and the
2930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# less significant byte is a bit index in the chunk (just like the
2940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# CHARSET matching).
2950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
2960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
2970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# of the basic multilingual plane; an efficient representation
2980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# for all of UTF-16 has not yet been developed. This means,
2990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# in particular, that negated charsets cannot be represented as
3000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao# bigcharsets.
3010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3020a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _optimize_unicode(charset, fixup):
3030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
3040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        import array
3050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except ImportError:
3060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return charset
3070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    charmap = [0]*65536
3080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    negate = 0
3090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    try:
3100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for op, av in charset:
3110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if op is NEGATE:
3120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                negate = 1
3130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is LITERAL:
3140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                charmap[fixup(av)] = 1
3150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is RANGE:
3160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for i in xrange(fixup(av[0]), fixup(av[1])+1):
3170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    charmap[i] = 1
3180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is CATEGORY:
3190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                # XXX: could expand category
3200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                return charset # cannot compress
3210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    except IndexError:
3220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # non-BMP characters
3230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return charset
3240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if negate:
3250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if sys.maxunicode != 65535:
3260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            # XXX: negation does not work with big charsets
3270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return charset
3280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(65536):
3290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            charmap[i] = not charmap[i]
3300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    comps = {}
3310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    mapping = [0]*256
3320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    block = 0
3330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    data = []
3340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for i in xrange(256):
3350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        chunk = tuple(charmap[i*256:(i+1)*256])
3360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        new = comps.setdefault(chunk, block)
3370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mapping[i] = new
3380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if new == block:
3390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            block = block + 1
3400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            data = data + _mk_bitmap(chunk)
3410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    header = [block]
3420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if _sre.CODESIZE == 2:
3430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        code = 'H'
3440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
3450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        code = 'I'
3460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Convert block indices to byte array of 256 bytes
3470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    mapping = array.array('b', mapping).tostring()
3480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # Convert byte array to word array
3490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    mapping = array.array(code, mapping)
3500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    assert mapping.itemsize == _sre.CODESIZE
3510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    header = header + mapping.tolist()
3520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    data[0:0] = header
3530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return [(BIGCHARSET, data)]
3540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3550a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _simple(av):
3560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # check if av is a "simple" operator
3570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    lo, hi = av[2].getwidth()
3580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if lo == 0 and hi == MAXREPEAT:
3590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise error, "nothing to repeat"
3600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
3610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
3620a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _compile_info(code, pattern, flags):
3630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # internal: compile an info block.  in the current version,
3640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # this contains min/max pattern width, and an optional literal
3650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # prefix or a character map
3660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    lo, hi = pattern.getwidth()
3670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if lo == 0:
3680a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        return # not worth it
3690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # look for a literal prefix
3700a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    prefix = []
3710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    prefixappend = prefix.append
3720a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    prefix_skip = 0
3730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    charset = [] # not used
3740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    charsetappend = charset.append
3750a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if not (flags & SRE_FLAG_IGNORECASE):
3760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # look for literal prefix
3770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for op, av in pattern.data:
3780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if op is LITERAL:
3790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if len(prefix) == prefix_skip:
3800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    prefix_skip = prefix_skip + 1
3810a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                prefixappend(av)
3820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is SUBPATTERN and len(av[1]) == 1:
3830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                op, av = av[1][0]
3840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if op is LITERAL:
3850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    prefixappend(av)
3860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
3870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    break
3880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            else:
3890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                break
3900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # if no prefix, look for charset prefix
3910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if not prefix and pattern.data:
3920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            op, av = pattern.data[0]
3930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            if op is SUBPATTERN and av[1]:
3940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                op, av = av[1][0]
3950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                if op is LITERAL:
3960a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    charsetappend((op, av))
3970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                elif op is BRANCH:
3980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    c = []
3990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    cappend = c.append
4000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    for p in av[1]:
4010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        if not p:
4020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            break
4030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        op, av = p[0]
4040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        if op is LITERAL:
4050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            cappend((op, av))
4060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        else:
4070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                            break
4080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
4090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        charset = c
4100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is BRANCH:
4110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                c = []
4120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                cappend = c.append
4130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                for p in av[1]:
4140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if not p:
4150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        break
4160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    op, av = p[0]
4170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    if op is LITERAL:
4180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        cappend((op, av))
4190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    else:
4200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                        break
4210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                else:
4220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                    charset = c
4230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            elif op is IN:
4240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                charset = av
4250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##     if prefix:
4260a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##         print "*** PREFIX", prefix, prefix_skip
4270a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##     if charset:
4280a8c90248264a8b26970b4473770bcc3df8515fJosh Gao##         print "*** CHARSET", charset
4290a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # add an info block
4300a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit = code.append
4310a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit(OPCODES[INFO])
4320a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    skip = len(code); emit(0)
4330a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # literal flag
4340a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    mask = 0
4350a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if prefix:
4360a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mask = SRE_INFO_PREFIX
4370a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if len(prefix) == prefix_skip == len(pattern.data):
4380a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            mask = mask + SRE_INFO_LITERAL
4390a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elif charset:
4400a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        mask = mask + SRE_INFO_CHARSET
4410a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    emit(mask)
4420a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # pattern length
4430a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if lo < MAXCODE:
4440a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(lo)
4450a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
4460a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(MAXCODE)
4470a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        prefix = prefix[:MAXCODE]
4480a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if hi < MAXCODE:
4490a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(hi)
4500a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
4510a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(0)
4520a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # add literal prefix
4530a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if prefix:
4540a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(len(prefix)) # length
4550a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        emit(prefix_skip) # skip
4560a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        code.extend(prefix)
4570a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        # generate overlap table
4580a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        table = [-1] + ([0]*len(prefix))
4590a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        for i in xrange(len(prefix)):
4600a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            table[i+1] = table[i]+1
4610a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
4620a8c90248264a8b26970b4473770bcc3df8515fJosh Gao                table[i+1] = table[table[i+1]-1]+1
4630a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        code.extend(table[1:]) # don't store first entry
4640a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    elif charset:
4650a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        _compile_charset(charset, flags, code)
4660a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    code[skip] = len(code) - skip
4670a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4680a8c90248264a8b26970b4473770bcc3df8515fJosh Gaotry:
4690a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    unicode
4700a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoexcept NameError:
4710a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    STRING_TYPES = (type(""),)
4720a8c90248264a8b26970b4473770bcc3df8515fJosh Gaoelse:
4730a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    STRING_TYPES = (type(""), type(unicode("")))
4740a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4750a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef isstring(obj):
4760a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for tp in STRING_TYPES:
4770a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        if isinstance(obj, tp):
4780a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            return 1
4790a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return 0
4800a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4810a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef _code(p, flags):
4820a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4830a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    flags = p.pattern.flags | flags
4840a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    code = []
4850a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4860a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compile info block
4870a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _compile_info(code, p, flags)
4880a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4890a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # compile the pattern
4900a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    _compile(code, p.data, flags)
4910a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4920a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    code.append(OPCODES[SUCCESS])
4930a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4940a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return code
4950a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4960a8c90248264a8b26970b4473770bcc3df8515fJosh Gaodef compile(p, flags=0):
4970a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # internal: convert pattern list to internal format
4980a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
4990a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if isstring(p):
5000a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pattern = p
5010a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        p = sre_parse.parse(p, flags)
5020a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    else:
5030a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pattern = None
5040a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5050a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    code = _code(p, flags)
5060a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5070a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # print code
5080a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5090a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # XXX: <fl> get rid of this limitation!
5100a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    if p.pattern.groups > 100:
5110a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        raise AssertionError(
5120a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            "sorry, but this version only supports 100 named groups"
5130a8c90248264a8b26970b4473770bcc3df8515fJosh Gao            )
5140a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5150a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    # map in either direction
5160a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    groupindex = p.pattern.groupdict
5170a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    indexgroup = [None] * p.pattern.groups
5180a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    for k, i in groupindex.items():
5190a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        indexgroup[i] = k
5200a8c90248264a8b26970b4473770bcc3df8515fJosh Gao
5210a8c90248264a8b26970b4473770bcc3df8515fJosh Gao    return _sre.compile(
5220a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        pattern, flags | p.pattern.flags, code,
5230a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        p.pattern.groups-1,
5240a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        groupindex, indexgroup
5250a8c90248264a8b26970b4473770bcc3df8515fJosh Gao        )
526