sre_constants.py revision 72b82ba16dea929b3fa9db5208b2353e8449c2d5
1# 2# Secret Labs' Regular Expression Engine 3# 4# various symbols used by the regular expression engine. 5# run this script to update the _sre include files! 6# 7# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. 8# 9# Portions of this engine have been developed in cooperation with 10# CNRI. Hewlett-Packard provided funding for 2.0 integration and 11# other compatibility work. 12# 13 14# should this really be here? 15 16class error(Exception): 17 pass 18 19# operators 20 21FAILURE = "failure" 22SUCCESS = "success" 23 24ANY = "any" 25ASSERT = "assert" 26ASSERT_NOT = "assert_not" 27AT = "at" 28BRANCH = "branch" 29CALL = "call" 30CATEGORY = "category" 31CHARSET = "charset" 32GROUPREF = "groupref" 33GROUPREF_IGNORE = "groupref_ignore" 34IN = "in" 35IN_IGNORE = "in_ignore" 36INDEX = "index" 37INFO = "info" 38JUMP = "jump" 39LITERAL = "literal" 40LITERAL_IGNORE = "literal_ignore" 41MARK = "mark" 42MAX_REPEAT = "max_repeat" 43MAX_REPEAT_ONE = "max_repeat_one" 44MIN_REPEAT = "min_repeat" 45NEGATE = "negate" 46NOT_LITERAL = "not_literal" 47NOT_LITERAL_IGNORE = "not_literal_ignore" 48RANGE = "range" 49REPEAT = "repeat" 50REPEAT_ONE = "repeat_one" 51SUBPATTERN = "subpattern" 52 53# positions 54AT_BEGINNING = "at_beginning" 55AT_BEGINNING_LINE = "at_beginning_line" 56AT_BOUNDARY = "at_boundary" 57AT_NON_BOUNDARY = "at_non_boundary" 58AT_END = "at_end" 59AT_END_LINE = "at_end_line" 60 61# categories 62CATEGORY_DIGIT = "category_digit" 63CATEGORY_NOT_DIGIT = "category_not_digit" 64CATEGORY_SPACE = "category_space" 65CATEGORY_NOT_SPACE = "category_not_space" 66CATEGORY_WORD = "category_word" 67CATEGORY_NOT_WORD = "category_not_word" 68CATEGORY_LINEBREAK = "category_linebreak" 69CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 70CATEGORY_LOC_WORD = "category_loc_word" 71CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 72CATEGORY_UNI_DIGIT = "category_uni_digit" 73CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 74CATEGORY_UNI_SPACE = "category_uni_space" 75CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 76CATEGORY_UNI_WORD = "category_uni_word" 77CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 78CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 79CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 80 81OPCODES = [ 82 83 # failure=0 success=1 (just because it looks better that way :-) 84 FAILURE, SUCCESS, 85 86 ANY, 87 ASSERT, ASSERT_NOT, 88 AT, 89 BRANCH, 90 CALL, 91 CATEGORY, 92 CHARSET, 93 GROUPREF, GROUPREF_IGNORE, 94 INDEX, 95 IN, IN_IGNORE, 96 INFO, 97 JUMP, 98 LITERAL, LITERAL_IGNORE, 99 MARK, 100 MAX_REPEAT, 101 MAX_REPEAT_ONE, 102 MIN_REPEAT, 103 NOT_LITERAL, NOT_LITERAL_IGNORE, 104 NEGATE, 105 RANGE, 106 REPEAT 107 108] 109 110ATCODES = [ 111 AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY, 112 AT_NON_BOUNDARY, AT_END, AT_END_LINE 113] 114 115CHCODES = [ 116 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 117 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 118 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 119 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 120 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 121 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 122 CATEGORY_UNI_NOT_LINEBREAK 123] 124 125def makedict(list): 126 d = {} 127 i = 0 128 for item in list: 129 d[item] = i 130 i = i + 1 131 return d 132 133OPCODES = makedict(OPCODES) 134ATCODES = makedict(ATCODES) 135CHCODES = makedict(CHCODES) 136 137# replacement operations for "ignore case" mode 138OP_IGNORE = { 139 GROUPREF: GROUPREF_IGNORE, 140 IN: IN_IGNORE, 141 LITERAL: LITERAL_IGNORE, 142 NOT_LITERAL: NOT_LITERAL_IGNORE 143} 144 145AT_MULTILINE = { 146 AT_BEGINNING: AT_BEGINNING_LINE, 147 AT_END: AT_END_LINE 148} 149 150CH_LOCALE = { 151 CATEGORY_DIGIT: CATEGORY_DIGIT, 152 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 153 CATEGORY_SPACE: CATEGORY_SPACE, 154 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 155 CATEGORY_WORD: CATEGORY_LOC_WORD, 156 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 157 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 158 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 159} 160 161CH_UNICODE = { 162 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 163 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 164 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 165 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 166 CATEGORY_WORD: CATEGORY_UNI_WORD, 167 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 168 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 169 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 170} 171 172# flags 173SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 174SRE_FLAG_IGNORECASE = 2 # case insensitive 175SRE_FLAG_LOCALE = 4 # honour system locale 176SRE_FLAG_MULTILINE = 8 # treat target as multiline string 177SRE_FLAG_DOTALL = 16 # treat target as a single string 178SRE_FLAG_UNICODE = 32 # use unicode locale 179SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 180 181# flags for INFO primitive 182SRE_INFO_PREFIX = 1 # has prefix 183SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 184SRE_INFO_CHARSET = 4 # pattern starts with character from given set 185 186if __name__ == "__main__": 187 import string 188 def dump(f, d, prefix): 189 items = d.items() 190 items.sort(lambda a, b: cmp(a[1], b[1])) 191 for k, v in items: 192 f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v)) 193 f = open("sre_constants.h", "w") 194 f.write("""\ 195/* 196 * Secret Labs' Regular Expression Engine 197 * 198 * regular expression matching engine 199 * 200 * NOTE: This file is generated by sre_constants.py. If you need 201 * to change anything in here, edit sre_constants.py and run it. 202 * 203 * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. 204 * 205 * See the _sre.c file for information on usage and redistribution. 206 */ 207 208""") 209 210 dump(f, OPCODES, "SRE_OP") 211 dump(f, ATCODES, "SRE") 212 dump(f, CHCODES, "SRE") 213 214 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 215 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 216 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 217 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 218 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 219 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 220 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 221 222 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 223 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 224 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 225 226 f.close() 227 print "done" 228