sre_constants.py revision 0de65807e6bdc5254f5a7e99b2f39adeea6b883b
1# 2# Secret Labs' Regular Expression Engine 3# 4# various symbols used by the regular expression engine. 5# run this script to update the _sre include files! 6# 7# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 8# 9# See the sre.py file for information on usage and redistribution. 10# 11 12# update when constants are added or removed 13 14MAGIC = 20010115 15 16# max code word in this release 17 18MAXREPEAT = 65535 19 20# SRE standard exception (access as sre.error) 21# should this really be here? 22 23class error(Exception): 24 pass 25 26# operators 27 28FAILURE = "failure" 29SUCCESS = "success" 30 31ANY = "any" 32ANY_ALL = "any_all" 33ASSERT = "assert" 34ASSERT_NOT = "assert_not" 35AT = "at" 36BRANCH = "branch" 37CALL = "call" 38CATEGORY = "category" 39CHARSET = "charset" 40GROUPREF = "groupref" 41GROUPREF_IGNORE = "groupref_ignore" 42IN = "in" 43IN_IGNORE = "in_ignore" 44INFO = "info" 45JUMP = "jump" 46LITERAL = "literal" 47LITERAL_IGNORE = "literal_ignore" 48MARK = "mark" 49MAX_REPEAT = "max_repeat" 50MAX_UNTIL = "max_until" 51MIN_REPEAT = "min_repeat" 52MIN_UNTIL = "min_until" 53NEGATE = "negate" 54NOT_LITERAL = "not_literal" 55NOT_LITERAL_IGNORE = "not_literal_ignore" 56RANGE = "range" 57REPEAT = "repeat" 58REPEAT_ONE = "repeat_one" 59SUBPATTERN = "subpattern" 60 61# positions 62AT_BEGINNING = "at_beginning" 63AT_BEGINNING_LINE = "at_beginning_line" 64AT_BEGINNING_STRING = "at_beginning_string" 65AT_BOUNDARY = "at_boundary" 66AT_NON_BOUNDARY = "at_non_boundary" 67AT_END = "at_end" 68AT_END_LINE = "at_end_line" 69AT_END_STRING = "at_end_string" 70 71# categories 72CATEGORY_DIGIT = "category_digit" 73CATEGORY_NOT_DIGIT = "category_not_digit" 74CATEGORY_SPACE = "category_space" 75CATEGORY_NOT_SPACE = "category_not_space" 76CATEGORY_WORD = "category_word" 77CATEGORY_NOT_WORD = "category_not_word" 78CATEGORY_LINEBREAK = "category_linebreak" 79CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 80CATEGORY_LOC_WORD = "category_loc_word" 81CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 82CATEGORY_UNI_DIGIT = "category_uni_digit" 83CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 84CATEGORY_UNI_SPACE = "category_uni_space" 85CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 86CATEGORY_UNI_WORD = "category_uni_word" 87CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 88CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 89CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 90 91OPCODES = [ 92 93 # failure=0 success=1 (just because it looks better that way :-) 94 FAILURE, SUCCESS, 95 96 ANY, ANY_ALL, 97 ASSERT, ASSERT_NOT, 98 AT, 99 BRANCH, 100 CALL, 101 CATEGORY, 102 CHARSET, 103 GROUPREF, GROUPREF_IGNORE, 104 IN, IN_IGNORE, 105 INFO, 106 JUMP, 107 LITERAL, LITERAL_IGNORE, 108 MARK, 109 MAX_UNTIL, 110 MIN_UNTIL, 111 NOT_LITERAL, NOT_LITERAL_IGNORE, 112 NEGATE, 113 RANGE, 114 REPEAT, 115 REPEAT_ONE, 116 SUBPATTERN 117 118] 119 120ATCODES = [ 121 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 122 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING 123] 124 125CHCODES = [ 126 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 127 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 128 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 129 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 130 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 131 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 132 CATEGORY_UNI_NOT_LINEBREAK 133] 134 135def makedict(list): 136 d = {} 137 i = 0 138 for item in list: 139 d[item] = i 140 i = i + 1 141 return d 142 143OPCODES = makedict(OPCODES) 144ATCODES = makedict(ATCODES) 145CHCODES = makedict(CHCODES) 146 147# replacement operations for "ignore case" mode 148OP_IGNORE = { 149 GROUPREF: GROUPREF_IGNORE, 150 IN: IN_IGNORE, 151 LITERAL: LITERAL_IGNORE, 152 NOT_LITERAL: NOT_LITERAL_IGNORE 153} 154 155AT_MULTILINE = { 156 AT_BEGINNING: AT_BEGINNING_LINE, 157 AT_END: AT_END_LINE 158} 159 160CH_LOCALE = { 161 CATEGORY_DIGIT: CATEGORY_DIGIT, 162 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 163 CATEGORY_SPACE: CATEGORY_SPACE, 164 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 165 CATEGORY_WORD: CATEGORY_LOC_WORD, 166 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 167 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 168 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 169} 170 171CH_UNICODE = { 172 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 173 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 174 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 175 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 176 CATEGORY_WORD: CATEGORY_UNI_WORD, 177 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 178 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 179 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 180} 181 182# flags 183SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 184SRE_FLAG_IGNORECASE = 2 # case insensitive 185SRE_FLAG_LOCALE = 4 # honour system locale 186SRE_FLAG_MULTILINE = 8 # treat target as multiline string 187SRE_FLAG_DOTALL = 16 # treat target as a single string 188SRE_FLAG_UNICODE = 32 # use unicode locale 189SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 190SRE_FLAG_DEBUG = 128 # debugging 191 192# flags for INFO primitive 193SRE_INFO_PREFIX = 1 # has prefix 194SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 195SRE_INFO_CHARSET = 4 # pattern starts with character from given set 196 197__all__ = locals().keys() 198for _i in range(len(__all__)-1,-1,-1): 199 if __all__[_i][0] == "_": 200 del __all__[_i] 201del _i 202 203if __name__ == "__main__": 204 def dump(f, d, prefix): 205 items = d.items() 206 items.sort(lambda a, b: cmp(a[1], b[1])) 207 for k, v in items: 208 f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) 209 f = open("sre_constants.h", "w") 210 f.write("""\ 211/* 212 * Secret Labs' Regular Expression Engine 213 * 214 * regular expression matching engine 215 * 216 * NOTE: This file is generated by sre_constants.py. If you need 217 * to change anything in here, edit sre_constants.py and run it. 218 * 219 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 220 * 221 * See the _sre.c file for information on usage and redistribution. 222 */ 223 224""") 225 226 f.write("#define SRE_MAGIC %d\n" % MAGIC) 227 228 dump(f, OPCODES, "SRE_OP") 229 dump(f, ATCODES, "SRE") 230 dump(f, CHCODES, "SRE") 231 232 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 233 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 234 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 235 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 236 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 237 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 238 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 239 240 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 241 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 242 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 243 244 f.close() 245 print "done" 246