| 1 # 2 # Secret Labs' Regular Expression Engine 3 # 4 # various symbols used by the regular expression engine. 5 # run this script to update the _sre include files! 6 # 7 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 8 # 9 # See the sre.py file for information on usage and redistribution. 10 # 11 12 """Internal support module for sre""" 13 14 # update when constants are added or removed 15 16 MAGIC = 20031017 17 18 from _sre import MAXREPEAT 19 20 # SRE standard exception (access as sre.error) 21 # should this really be here? 22 23 class error(Exception): 24 pass 25 26 # operators 27 28 FAILURE = "failure" 29 SUCCESS = "success" 30 31 ANY = "any" 32 ANY_ALL = "any_all" 33 ASSERT = "assert" 34 ASSERT_NOT = "assert_not" 35 AT = "at" 36 BIGCHARSET = "bigcharset" 37 BRANCH = "branch" 38 CALL = "call" 39 CATEGORY = "category" 40 CHARSET = "charset" 41 GROUPREF = "groupref" 42 GROUPREF_IGNORE = "groupref_ignore" 43 GROUPREF_EXISTS = "groupref_exists" 44 IN = "in" 45 IN_IGNORE = "in_ignore" 46 INFO = "info" 47 JUMP = "jump" 48 LITERAL = "literal" 49 LITERAL_IGNORE = "literal_ignore" 50 MARK = "mark" 51 MAX_REPEAT = "max_repeat" 52 MAX_UNTIL = "max_until" 53 MIN_REPEAT = "min_repeat" 54 MIN_UNTIL = "min_until" 55 NEGATE = "negate" 56 NOT_LITERAL = "not_literal" 57 NOT_LITERAL_IGNORE = "not_literal_ignore" 58 RANGE = "range" 59 REPEAT = "repeat" 60 REPEAT_ONE = "repeat_one" 61 SUBPATTERN = "subpattern" 62 MIN_REPEAT_ONE = "min_repeat_one" 63 64 # positions 65 AT_BEGINNING = "at_beginning" 66 AT_BEGINNING_LINE = "at_beginning_line" 67 AT_BEGINNING_STRING = "at_beginning_string" 68 AT_BOUNDARY = "at_boundary" 69 AT_NON_BOUNDARY = "at_non_boundary" 70 AT_END = "at_end" 71 AT_END_LINE = "at_end_line" 72 AT_END_STRING = "at_end_string" 73 AT_LOC_BOUNDARY = "at_loc_boundary" 74 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" 75 AT_UNI_BOUNDARY = "at_uni_boundary" 76 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" 77 78 # categories 79 CATEGORY_DIGIT = "category_digit" 80 CATEGORY_NOT_DIGIT = "category_not_digit" 81 CATEGORY_SPACE = "category_space" 82 CATEGORY_NOT_SPACE = "category_not_space" 83 CATEGORY_WORD = "category_word" 84 CATEGORY_NOT_WORD = "category_not_word" 85 CATEGORY_LINEBREAK = "category_linebreak" 86 CATEGORY_NOT_LINEBREAK = "category_not_linebreak" 87 CATEGORY_LOC_WORD = "category_loc_word" 88 CATEGORY_LOC_NOT_WORD = "category_loc_not_word" 89 CATEGORY_UNI_DIGIT = "category_uni_digit" 90 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" 91 CATEGORY_UNI_SPACE = "category_uni_space" 92 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" 93 CATEGORY_UNI_WORD = "category_uni_word" 94 CATEGORY_UNI_NOT_WORD = "category_uni_not_word" 95 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" 96 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" 97 98 OPCODES = [ 99 100 # failure=0 success=1 (just because it looks better that way :-) 101 FAILURE, SUCCESS, 102 103 ANY, ANY_ALL, 104 ASSERT, ASSERT_NOT, 105 AT, 106 BRANCH, 107 CALL, 108 CATEGORY, 109 CHARSET, BIGCHARSET, 110 GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, 111 IN, IN_IGNORE, 112 INFO, 113 JUMP, 114 LITERAL, LITERAL_IGNORE, 115 MARK, 116 MAX_UNTIL, 117 MIN_UNTIL, 118 NOT_LITERAL, NOT_LITERAL_IGNORE, 119 NEGATE, 120 RANGE, 121 REPEAT, 122 REPEAT_ONE, 123 SUBPATTERN, 124 MIN_REPEAT_ONE 125 126 ] 127 128 ATCODES = [ 129 AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, 130 AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, 131 AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, 132 AT_UNI_NON_BOUNDARY 133 ] 134 135 CHCODES = [ 136 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, 137 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, 138 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, 139 CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, 140 CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, 141 CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, 142 CATEGORY_UNI_NOT_LINEBREAK 143 ] 144 145 def makedict(list): 146 d = {} 147 i = 0 148 for item in list: 149 d[item] = i 150 i = i + 1 151 return d 152 153 OPCODES = makedict(OPCODES) 154 ATCODES = makedict(ATCODES) 155 CHCODES = makedict(CHCODES) 156 157 # replacement operations for "ignore case" mode 158 OP_IGNORE = { 159 GROUPREF: GROUPREF_IGNORE, 160 IN: IN_IGNORE, 161 LITERAL: LITERAL_IGNORE, 162 NOT_LITERAL: NOT_LITERAL_IGNORE 163 } 164 165 AT_MULTILINE = { 166 AT_BEGINNING: AT_BEGINNING_LINE, 167 AT_END: AT_END_LINE 168 } 169 170 AT_LOCALE = { 171 AT_BOUNDARY: AT_LOC_BOUNDARY, 172 AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY 173 } 174 175 AT_UNICODE = { 176 AT_BOUNDARY: AT_UNI_BOUNDARY, 177 AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY 178 } 179 180 CH_LOCALE = { 181 CATEGORY_DIGIT: CATEGORY_DIGIT, 182 CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, 183 CATEGORY_SPACE: CATEGORY_SPACE, 184 CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, 185 CATEGORY_WORD: CATEGORY_LOC_WORD, 186 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, 187 CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, 188 CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK 189 } 190 191 CH_UNICODE = { 192 CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, 193 CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, 194 CATEGORY_SPACE: CATEGORY_UNI_SPACE, 195 CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, 196 CATEGORY_WORD: CATEGORY_UNI_WORD, 197 CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, 198 CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, 199 CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK 200 } 201 202 # flags 203 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) 204 SRE_FLAG_IGNORECASE = 2 # case insensitive 205 SRE_FLAG_LOCALE = 4 # honour system locale 206 SRE_FLAG_MULTILINE = 8 # treat target as multiline string 207 SRE_FLAG_DOTALL = 16 # treat target as a single string 208 SRE_FLAG_UNICODE = 32 # use unicode "locale" 209 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments 210 SRE_FLAG_DEBUG = 128 # debugging 211 SRE_FLAG_ASCII = 256 # use ascii "locale" 212 213 # flags for INFO primitive 214 SRE_INFO_PREFIX = 1 # has prefix 215 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) 216 SRE_INFO_CHARSET = 4 # pattern starts with character from given set 217 218 if __name__ == "__main__": 219 def dump(f, d, prefix): 220 items = sorted(d.items(), key=lambda a: a[1]) 221 for k, v in items: 222 f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) 223 f = open("sre_constants.h", "w") 224 f.write("""\ 225 /* 226 * Secret Labs' Regular Expression Engine 227 * 228 * regular expression matching engine 229 * 230 * NOTE: This file is generated by sre_constants.py. If you need 231 * to change anything in here, edit sre_constants.py and run it. 232 * 233 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 234 * 235 * See the _sre.c file for information on usage and redistribution. 236 */ 237 238 """) 239 240 f.write("#define SRE_MAGIC %d\n" % MAGIC) 241 242 dump(f, OPCODES, "SRE_OP") 243 dump(f, ATCODES, "SRE") 244 dump(f, CHCODES, "SRE") 245 246 f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) 247 f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) 248 f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) 249 f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) 250 f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) 251 f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) 252 f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) 253 254 f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) 255 f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) 256 f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) 257 258 f.close() 259 print("done") |