1# ----------------------------------------------------------------------------- 2# cpp.py 3# 4# Author: David Beazley (http://www.dabeaz.com) 5# Copyright (C) 2007 6# All rights reserved 7# 8# This module implements an ANSI-C style lexical preprocessor for PLY. 9# ----------------------------------------------------------------------------- 10from __future__ import generators 11 12import sys 13 14# Some Python 3 compatibility shims 15if sys.version_info.major < 3: 16 STRING_TYPES = (str, unicode) 17else: 18 STRING_TYPES = str 19 xrange = range 20 21# ----------------------------------------------------------------------------- 22# Default preprocessor lexer definitions. These tokens are enough to get 23# a basic preprocessor working. Other modules may import these if they want 24# ----------------------------------------------------------------------------- 25 26tokens = ( 27 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND' 28) 29 30literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\"" 31 32# Whitespace 33def t_CPP_WS(t): 34 r'\s+' 35 t.lexer.lineno += t.value.count("\n") 36 return t 37 38t_CPP_POUND = r'\#' 39t_CPP_DPOUND = r'\#\#' 40 41# Identifier 42t_CPP_ID = r'[A-Za-z_][\w_]*' 43 44# Integer literal 45def CPP_INTEGER(t): 46 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)' 47 return t 48 49t_CPP_INTEGER = CPP_INTEGER 50 51# Floating literal 52t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 53 54# String literal 55def t_CPP_STRING(t): 56 r'\"([^\\\n]|(\\(.|\n)))*?\"' 57 t.lexer.lineno += t.value.count("\n") 58 return t 59 60# Character constant 'c' or L'c' 61def t_CPP_CHAR(t): 62 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\'' 63 t.lexer.lineno += t.value.count("\n") 64 return t 65 66# Comment 67def t_CPP_COMMENT1(t): 68 r'(/\*(.|\n)*?\*/)' 69 ncr = t.value.count("\n") 70 t.lexer.lineno += ncr 71 # replace with one space or a number of '\n' 72 t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' ' 73 return t 74 75# Line comment 76def t_CPP_COMMENT2(t): 77 r'(//.*?(\n|$))' 78 # replace with '/n' 79 t.type = 'CPP_WS'; t.value = '\n' 80 return t 81 82def t_error(t): 83 t.type = t.value[0] 84 t.value = t.value[0] 85 t.lexer.skip(1) 86 return t 87 88import re 89import copy 90import time 91import os.path 92 93# ----------------------------------------------------------------------------- 94# trigraph() 95# 96# Given an input string, this function replaces all trigraph sequences. 97# The following mapping is used: 98# 99# ??= # 100# ??/ \ 101# ??' ^ 102# ??( [ 103# ??) ] 104# ??! | 105# ??< { 106# ??> } 107# ??- ~ 108# ----------------------------------------------------------------------------- 109 110_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''') 111_trigraph_rep = { 112 '=':'#', 113 '/':'\\', 114 "'":'^', 115 '(':'[', 116 ')':']', 117 '!':'|', 118 '<':'{', 119 '>':'}', 120 '-':'~' 121} 122 123def trigraph(input): 124 return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input) 125 126# ------------------------------------------------------------------ 127# Macro object 128# 129# This object holds information about preprocessor macros 130# 131# .name - Macro name (string) 132# .value - Macro value (a list of tokens) 133# .arglist - List of argument names 134# .variadic - Boolean indicating whether or not variadic macro 135# .vararg - Name of the variadic parameter 136# 137# When a macro is created, the macro replacement token sequence is 138# pre-scanned and used to create patch lists that are later used 139# during macro expansion 140# ------------------------------------------------------------------ 141 142class Macro(object): 143 def __init__(self,name,value,arglist=None,variadic=False): 144 self.name = name 145 self.value = value 146 self.arglist = arglist 147 self.variadic = variadic 148 if variadic: 149 self.vararg = arglist[-1] 150 self.source = None 151 152# ------------------------------------------------------------------ 153# Preprocessor object 154# 155# Object representing a preprocessor. Contains macro definitions, 156# include directories, and other information 157# ------------------------------------------------------------------ 158 159class Preprocessor(object): 160 def __init__(self,lexer=None): 161 if lexer is None: 162 lexer = lex.lexer 163 self.lexer = lexer 164 self.macros = { } 165 self.path = [] 166 self.temp_path = [] 167 168 # Probe the lexer for selected tokens 169 self.lexprobe() 170 171 tm = time.localtime() 172 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm)) 173 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm)) 174 self.parser = None 175 176 # ----------------------------------------------------------------------------- 177 # tokenize() 178 # 179 # Utility function. Given a string of text, tokenize into a list of tokens 180 # ----------------------------------------------------------------------------- 181 182 def tokenize(self,text): 183 tokens = [] 184 self.lexer.input(text) 185 while True: 186 tok = self.lexer.token() 187 if not tok: break 188 tokens.append(tok) 189 return tokens 190 191 # --------------------------------------------------------------------- 192 # error() 193 # 194 # Report a preprocessor error/warning of some kind 195 # ---------------------------------------------------------------------- 196 197 def error(self,file,line,msg): 198 print("%s:%d %s" % (file,line,msg)) 199 200 # ---------------------------------------------------------------------- 201 # lexprobe() 202 # 203 # This method probes the preprocessor lexer object to discover 204 # the token types of symbols that are important to the preprocessor. 205 # If this works right, the preprocessor will simply "work" 206 # with any suitable lexer regardless of how tokens have been named. 207 # ---------------------------------------------------------------------- 208 209 def lexprobe(self): 210 211 # Determine the token type for identifiers 212 self.lexer.input("identifier") 213 tok = self.lexer.token() 214 if not tok or tok.value != "identifier": 215 print("Couldn't determine identifier type") 216 else: 217 self.t_ID = tok.type 218 219 # Determine the token type for integers 220 self.lexer.input("12345") 221 tok = self.lexer.token() 222 if not tok or int(tok.value) != 12345: 223 print("Couldn't determine integer type") 224 else: 225 self.t_INTEGER = tok.type 226 self.t_INTEGER_TYPE = type(tok.value) 227 228 # Determine the token type for strings enclosed in double quotes 229 self.lexer.input("\"filename\"") 230 tok = self.lexer.token() 231 if not tok or tok.value != "\"filename\"": 232 print("Couldn't determine string type") 233 else: 234 self.t_STRING = tok.type 235 236 # Determine the token type for whitespace--if any 237 self.lexer.input(" ") 238 tok = self.lexer.token() 239 if not tok or tok.value != " ": 240 self.t_SPACE = None 241 else: 242 self.t_SPACE = tok.type 243 244 # Determine the token type for newlines 245 self.lexer.input("\n") 246 tok = self.lexer.token() 247 if not tok or tok.value != "\n": 248 self.t_NEWLINE = None 249 print("Couldn't determine token for newlines") 250 else: 251 self.t_NEWLINE = tok.type 252 253 self.t_WS = (self.t_SPACE, self.t_NEWLINE) 254 255 # Check for other characters used by the preprocessor 256 chars = [ '<','>','#','##','\\','(',')',',','.'] 257 for c in chars: 258 self.lexer.input(c) 259 tok = self.lexer.token() 260 if not tok or tok.value != c: 261 print("Unable to lex '%s' required for preprocessor" % c) 262 263 # ---------------------------------------------------------------------- 264 # add_path() 265 # 266 # Adds a search path to the preprocessor. 267 # ---------------------------------------------------------------------- 268 269 def add_path(self,path): 270 self.path.append(path) 271 272 # ---------------------------------------------------------------------- 273 # group_lines() 274 # 275 # Given an input string, this function splits it into lines. Trailing whitespace 276 # is removed. Any line ending with \ is grouped with the next line. This 277 # function forms the lowest level of the preprocessor---grouping into text into 278 # a line-by-line format. 279 # ---------------------------------------------------------------------- 280 281 def group_lines(self,input): 282 lex = self.lexer.clone() 283 lines = [x.rstrip() for x in input.splitlines()] 284 for i in xrange(len(lines)): 285 j = i+1 286 while lines[i].endswith('\\') and (j < len(lines)): 287 lines[i] = lines[i][:-1]+lines[j] 288 lines[j] = "" 289 j += 1 290 291 input = "\n".join(lines) 292 lex.input(input) 293 lex.lineno = 1 294 295 current_line = [] 296 while True: 297 tok = lex.token() 298 if not tok: 299 break 300 current_line.append(tok) 301 if tok.type in self.t_WS and '\n' in tok.value: 302 yield current_line 303 current_line = [] 304 305 if current_line: 306 yield current_line 307 308 # ---------------------------------------------------------------------- 309 # tokenstrip() 310 # 311 # Remove leading/trailing whitespace tokens from a token list 312 # ---------------------------------------------------------------------- 313 314 def tokenstrip(self,tokens): 315 i = 0 316 while i < len(tokens) and tokens[i].type in self.t_WS: 317 i += 1 318 del tokens[:i] 319 i = len(tokens)-1 320 while i >= 0 and tokens[i].type in self.t_WS: 321 i -= 1 322 del tokens[i+1:] 323 return tokens 324 325 326 # ---------------------------------------------------------------------- 327 # collect_args() 328 # 329 # Collects comma separated arguments from a list of tokens. The arguments 330 # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) 331 # where tokencount is the number of tokens consumed, args is a list of arguments, 332 # and positions is a list of integers containing the starting index of each 333 # argument. Each argument is represented by a list of tokens. 334 # 335 # When collecting arguments, leading and trailing whitespace is removed 336 # from each argument. 337 # 338 # This function properly handles nested parenthesis and commas---these do not 339 # define new arguments. 340 # ---------------------------------------------------------------------- 341 342 def collect_args(self,tokenlist): 343 args = [] 344 positions = [] 345 current_arg = [] 346 nesting = 1 347 tokenlen = len(tokenlist) 348 349 # Search for the opening '('. 350 i = 0 351 while (i < tokenlen) and (tokenlist[i].type in self.t_WS): 352 i += 1 353 354 if (i < tokenlen) and (tokenlist[i].value == '('): 355 positions.append(i+1) 356 else: 357 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments") 358 return 0, [], [] 359 360 i += 1 361 362 while i < tokenlen: 363 t = tokenlist[i] 364 if t.value == '(': 365 current_arg.append(t) 366 nesting += 1 367 elif t.value == ')': 368 nesting -= 1 369 if nesting == 0: 370 if current_arg: 371 args.append(self.tokenstrip(current_arg)) 372 positions.append(i) 373 return i+1,args,positions 374 current_arg.append(t) 375 elif t.value == ',' and nesting == 1: 376 args.append(self.tokenstrip(current_arg)) 377 positions.append(i+1) 378 current_arg = [] 379 else: 380 current_arg.append(t) 381 i += 1 382 383 # Missing end argument 384 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments") 385 return 0, [],[] 386 387 # ---------------------------------------------------------------------- 388 # macro_prescan() 389 # 390 # Examine the macro value (token sequence) and identify patch points 391 # This is used to speed up macro expansion later on---we'll know 392 # right away where to apply patches to the value to form the expansion 393 # ---------------------------------------------------------------------- 394 395 def macro_prescan(self,macro): 396 macro.patch = [] # Standard macro arguments 397 macro.str_patch = [] # String conversion expansion 398 macro.var_comma_patch = [] # Variadic macro comma patch 399 i = 0 400 while i < len(macro.value): 401 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist: 402 argnum = macro.arglist.index(macro.value[i].value) 403 # Conversion of argument to a string 404 if i > 0 and macro.value[i-1].value == '#': 405 macro.value[i] = copy.copy(macro.value[i]) 406 macro.value[i].type = self.t_STRING 407 del macro.value[i-1] 408 macro.str_patch.append((argnum,i-1)) 409 continue 410 # Concatenation 411 elif (i > 0 and macro.value[i-1].value == '##'): 412 macro.patch.append(('c',argnum,i-1)) 413 del macro.value[i-1] 414 continue 415 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'): 416 macro.patch.append(('c',argnum,i)) 417 i += 1 418 continue 419 # Standard expansion 420 else: 421 macro.patch.append(('e',argnum,i)) 422 elif macro.value[i].value == '##': 423 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \ 424 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \ 425 (macro.value[i+1].value == macro.vararg): 426 macro.var_comma_patch.append(i-1) 427 i += 1 428 macro.patch.sort(key=lambda x: x[2],reverse=True) 429 430 # ---------------------------------------------------------------------- 431 # macro_expand_args() 432 # 433 # Given a Macro and list of arguments (each a token list), this method 434 # returns an expanded version of a macro. The return value is a token sequence 435 # representing the replacement macro tokens 436 # ---------------------------------------------------------------------- 437 438 def macro_expand_args(self,macro,args): 439 # Make a copy of the macro token sequence 440 rep = [copy.copy(_x) for _x in macro.value] 441 442 # Make string expansion patches. These do not alter the length of the replacement sequence 443 444 str_expansion = {} 445 for argnum, i in macro.str_patch: 446 if argnum not in str_expansion: 447 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\") 448 rep[i] = copy.copy(rep[i]) 449 rep[i].value = str_expansion[argnum] 450 451 # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid 452 comma_patch = False 453 if macro.variadic and not args[-1]: 454 for i in macro.var_comma_patch: 455 rep[i] = None 456 comma_patch = True 457 458 # Make all other patches. The order of these matters. It is assumed that the patch list 459 # has been sorted in reverse order of patch location since replacements will cause the 460 # size of the replacement sequence to expand from the patch point. 461 462 expanded = { } 463 for ptype, argnum, i in macro.patch: 464 # Concatenation. Argument is left unexpanded 465 if ptype == 'c': 466 rep[i:i+1] = args[argnum] 467 # Normal expansion. Argument is macro expanded first 468 elif ptype == 'e': 469 if argnum not in expanded: 470 expanded[argnum] = self.expand_macros(args[argnum]) 471 rep[i:i+1] = expanded[argnum] 472 473 # Get rid of removed comma if necessary 474 if comma_patch: 475 rep = [_i for _i in rep if _i] 476 477 return rep 478 479 480 # ---------------------------------------------------------------------- 481 # expand_macros() 482 # 483 # Given a list of tokens, this function performs macro expansion. 484 # The expanded argument is a dictionary that contains macros already 485 # expanded. This is used to prevent infinite recursion. 486 # ---------------------------------------------------------------------- 487 488 def expand_macros(self,tokens,expanded=None): 489 if expanded is None: 490 expanded = {} 491 i = 0 492 while i < len(tokens): 493 t = tokens[i] 494 if t.type == self.t_ID: 495 if t.value in self.macros and t.value not in expanded: 496 # Yes, we found a macro match 497 expanded[t.value] = True 498 499 m = self.macros[t.value] 500 if not m.arglist: 501 # A simple macro 502 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded) 503 for e in ex: 504 e.lineno = t.lineno 505 tokens[i:i+1] = ex 506 i += len(ex) 507 else: 508 # A macro with arguments 509 j = i + 1 510 while j < len(tokens) and tokens[j].type in self.t_WS: 511 j += 1 512 if tokens[j].value == '(': 513 tokcount,args,positions = self.collect_args(tokens[j:]) 514 if not m.variadic and len(args) != len(m.arglist): 515 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist))) 516 i = j + tokcount 517 elif m.variadic and len(args) < len(m.arglist)-1: 518 if len(m.arglist) > 2: 519 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1)) 520 else: 521 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1)) 522 i = j + tokcount 523 else: 524 if m.variadic: 525 if len(args) == len(m.arglist)-1: 526 args.append([]) 527 else: 528 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1] 529 del args[len(m.arglist):] 530 531 # Get macro replacement text 532 rep = self.macro_expand_args(m,args) 533 rep = self.expand_macros(rep,expanded) 534 for r in rep: 535 r.lineno = t.lineno 536 tokens[i:j+tokcount] = rep 537 i += len(rep) 538 del expanded[t.value] 539 continue 540 elif t.value == '__LINE__': 541 t.type = self.t_INTEGER 542 t.value = self.t_INTEGER_TYPE(t.lineno) 543 544 i += 1 545 return tokens 546 547 # ---------------------------------------------------------------------- 548 # evalexpr() 549 # 550 # Evaluate an expression token sequence for the purposes of evaluating 551 # integral expressions. 552 # ---------------------------------------------------------------------- 553 554 def evalexpr(self,tokens): 555 # tokens = tokenize(line) 556 # Search for defined macros 557 i = 0 558 while i < len(tokens): 559 if tokens[i].type == self.t_ID and tokens[i].value == 'defined': 560 j = i + 1 561 needparen = False 562 result = "0L" 563 while j < len(tokens): 564 if tokens[j].type in self.t_WS: 565 j += 1 566 continue 567 elif tokens[j].type == self.t_ID: 568 if tokens[j].value in self.macros: 569 result = "1L" 570 else: 571 result = "0L" 572 if not needparen: break 573 elif tokens[j].value == '(': 574 needparen = True 575 elif tokens[j].value == ')': 576 break 577 else: 578 self.error(self.source,tokens[i].lineno,"Malformed defined()") 579 j += 1 580 tokens[i].type = self.t_INTEGER 581 tokens[i].value = self.t_INTEGER_TYPE(result) 582 del tokens[i+1:j+1] 583 i += 1 584 tokens = self.expand_macros(tokens) 585 for i,t in enumerate(tokens): 586 if t.type == self.t_ID: 587 tokens[i] = copy.copy(t) 588 tokens[i].type = self.t_INTEGER 589 tokens[i].value = self.t_INTEGER_TYPE("0L") 590 elif t.type == self.t_INTEGER: 591 tokens[i] = copy.copy(t) 592 # Strip off any trailing suffixes 593 tokens[i].value = str(tokens[i].value) 594 while tokens[i].value[-1] not in "0123456789abcdefABCDEF": 595 tokens[i].value = tokens[i].value[:-1] 596 597 expr = "".join([str(x.value) for x in tokens]) 598 expr = expr.replace("&&"," and ") 599 expr = expr.replace("||"," or ") 600 expr = expr.replace("!"," not ") 601 try: 602 result = eval(expr) 603 except Exception: 604 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression") 605 result = 0 606 return result 607 608 # ---------------------------------------------------------------------- 609 # parsegen() 610 # 611 # Parse an input string/ 612 # ---------------------------------------------------------------------- 613 def parsegen(self,input,source=None): 614 615 # Replace trigraph sequences 616 t = trigraph(input) 617 lines = self.group_lines(t) 618 619 if not source: 620 source = "" 621 622 self.define("__FILE__ \"%s\"" % source) 623 624 self.source = source 625 chunk = [] 626 enable = True 627 iftrigger = False 628 ifstack = [] 629 630 for x in lines: 631 for i,tok in enumerate(x): 632 if tok.type not in self.t_WS: break 633 if tok.value == '#': 634 # Preprocessor directive 635 636 # insert necessary whitespace instead of eaten tokens 637 for tok in x: 638 if tok.type in self.t_WS and '\n' in tok.value: 639 chunk.append(tok) 640 641 dirtokens = self.tokenstrip(x[i+1:]) 642 if dirtokens: 643 name = dirtokens[0].value 644 args = self.tokenstrip(dirtokens[1:]) 645 else: 646 name = "" 647 args = [] 648 649 if name == 'define': 650 if enable: 651 for tok in self.expand_macros(chunk): 652 yield tok 653 chunk = [] 654 self.define(args) 655 elif name == 'include': 656 if enable: 657 for tok in self.expand_macros(chunk): 658 yield tok 659 chunk = [] 660 oldfile = self.macros['__FILE__'] 661 for tok in self.include(args): 662 yield tok 663 self.macros['__FILE__'] = oldfile 664 self.source = source 665 elif name == 'undef': 666 if enable: 667 for tok in self.expand_macros(chunk): 668 yield tok 669 chunk = [] 670 self.undef(args) 671 elif name == 'ifdef': 672 ifstack.append((enable,iftrigger)) 673 if enable: 674 if not args[0].value in self.macros: 675 enable = False 676 iftrigger = False 677 else: 678 iftrigger = True 679 elif name == 'ifndef': 680 ifstack.append((enable,iftrigger)) 681 if enable: 682 if args[0].value in self.macros: 683 enable = False 684 iftrigger = False 685 else: 686 iftrigger = True 687 elif name == 'if': 688 ifstack.append((enable,iftrigger)) 689 if enable: 690 result = self.evalexpr(args) 691 if not result: 692 enable = False 693 iftrigger = False 694 else: 695 iftrigger = True 696 elif name == 'elif': 697 if ifstack: 698 if ifstack[-1][0]: # We only pay attention if outer "if" allows this 699 if enable: # If already true, we flip enable False 700 enable = False 701 elif not iftrigger: # If False, but not triggered yet, we'll check expression 702 result = self.evalexpr(args) 703 if result: 704 enable = True 705 iftrigger = True 706 else: 707 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif") 708 709 elif name == 'else': 710 if ifstack: 711 if ifstack[-1][0]: 712 if enable: 713 enable = False 714 elif not iftrigger: 715 enable = True 716 iftrigger = True 717 else: 718 self.error(self.source,dirtokens[0].lineno,"Misplaced #else") 719 720 elif name == 'endif': 721 if ifstack: 722 enable,iftrigger = ifstack.pop() 723 else: 724 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif") 725 else: 726 # Unknown preprocessor directive 727 pass 728 729 else: 730 # Normal text 731 if enable: 732 chunk.extend(x) 733 734 for tok in self.expand_macros(chunk): 735 yield tok 736 chunk = [] 737 738 # ---------------------------------------------------------------------- 739 # include() 740 # 741 # Implementation of file-inclusion 742 # ---------------------------------------------------------------------- 743 744 def include(self,tokens): 745 # Try to extract the filename and then process an include file 746 if not tokens: 747 return 748 if tokens: 749 if tokens[0].value != '<' and tokens[0].type != self.t_STRING: 750 tokens = self.expand_macros(tokens) 751 752 if tokens[0].value == '<': 753 # Include <...> 754 i = 1 755 while i < len(tokens): 756 if tokens[i].value == '>': 757 break 758 i += 1 759 else: 760 print("Malformed #include <...>") 761 return 762 filename = "".join([x.value for x in tokens[1:i]]) 763 path = self.path + [""] + self.temp_path 764 elif tokens[0].type == self.t_STRING: 765 filename = tokens[0].value[1:-1] 766 path = self.temp_path + [""] + self.path 767 else: 768 print("Malformed #include statement") 769 return 770 for p in path: 771 iname = os.path.join(p,filename) 772 try: 773 data = open(iname,"r").read() 774 dname = os.path.dirname(iname) 775 if dname: 776 self.temp_path.insert(0,dname) 777 for tok in self.parsegen(data,filename): 778 yield tok 779 if dname: 780 del self.temp_path[0] 781 break 782 except IOError: 783 pass 784 else: 785 print("Couldn't find '%s'" % filename) 786 787 # ---------------------------------------------------------------------- 788 # define() 789 # 790 # Define a new macro 791 # ---------------------------------------------------------------------- 792 793 def define(self,tokens): 794 if isinstance(tokens,STRING_TYPES): 795 tokens = self.tokenize(tokens) 796 797 linetok = tokens 798 try: 799 name = linetok[0] 800 if len(linetok) > 1: 801 mtype = linetok[1] 802 else: 803 mtype = None 804 if not mtype: 805 m = Macro(name.value,[]) 806 self.macros[name.value] = m 807 elif mtype.type in self.t_WS: 808 # A normal macro 809 m = Macro(name.value,self.tokenstrip(linetok[2:])) 810 self.macros[name.value] = m 811 elif mtype.value == '(': 812 # A macro with arguments 813 tokcount, args, positions = self.collect_args(linetok[1:]) 814 variadic = False 815 for a in args: 816 if variadic: 817 print("No more arguments may follow a variadic argument") 818 break 819 astr = "".join([str(_i.value) for _i in a]) 820 if astr == "...": 821 variadic = True 822 a[0].type = self.t_ID 823 a[0].value = '__VA_ARGS__' 824 variadic = True 825 del a[1:] 826 continue 827 elif astr[-3:] == "..." and a[0].type == self.t_ID: 828 variadic = True 829 del a[1:] 830 # If, for some reason, "." is part of the identifier, strip off the name for the purposes 831 # of macro expansion 832 if a[0].value[-3:] == '...': 833 a[0].value = a[0].value[:-3] 834 continue 835 if len(a) > 1 or a[0].type != self.t_ID: 836 print("Invalid macro argument") 837 break 838 else: 839 mvalue = self.tokenstrip(linetok[1+tokcount:]) 840 i = 0 841 while i < len(mvalue): 842 if i+1 < len(mvalue): 843 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##': 844 del mvalue[i] 845 continue 846 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS: 847 del mvalue[i+1] 848 i += 1 849 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic) 850 self.macro_prescan(m) 851 self.macros[name.value] = m 852 else: 853 print("Bad macro definition") 854 except LookupError: 855 print("Bad macro definition") 856 857 # ---------------------------------------------------------------------- 858 # undef() 859 # 860 # Undefine a macro 861 # ---------------------------------------------------------------------- 862 863 def undef(self,tokens): 864 id = tokens[0].value 865 try: 866 del self.macros[id] 867 except LookupError: 868 pass 869 870 # ---------------------------------------------------------------------- 871 # parse() 872 # 873 # Parse input text. 874 # ---------------------------------------------------------------------- 875 def parse(self,input,source=None,ignore={}): 876 self.ignore = ignore 877 self.parser = self.parsegen(input,source) 878 879 # ---------------------------------------------------------------------- 880 # token() 881 # 882 # Method to return individual tokens 883 # ---------------------------------------------------------------------- 884 def token(self): 885 try: 886 while True: 887 tok = next(self.parser) 888 if tok.type not in self.ignore: return tok 889 except StopIteration: 890 self.parser = None 891 return None 892 893if __name__ == '__main__': 894 import ply.lex as lex 895 lexer = lex.lex() 896 897 # Run a preprocessor 898 import sys 899 f = open(sys.argv[1]) 900 input = f.read() 901 902 p = Preprocessor(lexer) 903 p.parse(input,sys.argv[1]) 904 while True: 905 tok = p.token() 906 if not tok: break 907 print(p.source, tok) 908 909 910 911 912 913 914 915 916 917 918 919