1# -----------------------------------------------------------------------------
2# cpp.py
3#
4# Author:  David Beazley (http://www.dabeaz.com)
5# Copyright (C) 2007
6# All rights reserved
7#
8# This module implements an ANSI-C style lexical preprocessor for PLY.
9# -----------------------------------------------------------------------------
10from __future__ import generators
11
12import sys
13
14# Some Python 3 compatibility shims
15if sys.version_info.major < 3:
16    STRING_TYPES = (str, unicode)
17else:
18    STRING_TYPES = str
19    xrange = range
20
21# -----------------------------------------------------------------------------
22# Default preprocessor lexer definitions.   These tokens are enough to get
23# a basic preprocessor working.   Other modules may import these if they want
24# -----------------------------------------------------------------------------
25
26tokens = (
27   'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
28)
29
30literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
31
32# Whitespace
33def t_CPP_WS(t):
34    r'\s+'
35    t.lexer.lineno += t.value.count("\n")
36    return t
37
38t_CPP_POUND = r'\#'
39t_CPP_DPOUND = r'\#\#'
40
41# Identifier
42t_CPP_ID = r'[A-Za-z_][\w_]*'
43
44# Integer literal
45def CPP_INTEGER(t):
46    r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
47    return t
48
49t_CPP_INTEGER = CPP_INTEGER
50
51# Floating literal
52t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
53
54# String literal
55def t_CPP_STRING(t):
56    r'\"([^\\\n]|(\\(.|\n)))*?\"'
57    t.lexer.lineno += t.value.count("\n")
58    return t
59
60# Character constant 'c' or L'c'
61def t_CPP_CHAR(t):
62    r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
63    t.lexer.lineno += t.value.count("\n")
64    return t
65
66# Comment
67def t_CPP_COMMENT1(t):
68    r'(/\*(.|\n)*?\*/)'
69    ncr = t.value.count("\n")
70    t.lexer.lineno += ncr
71    # replace with one space or a number of '\n'
72    t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
73    return t
74
75# Line comment
76def t_CPP_COMMENT2(t):
77    r'(//.*?(\n|$))'
78    # replace with '/n'
79    t.type = 'CPP_WS'; t.value = '\n'
80    return t
81
82def t_error(t):
83    t.type = t.value[0]
84    t.value = t.value[0]
85    t.lexer.skip(1)
86    return t
87
88import re
89import copy
90import time
91import os.path
92
93# -----------------------------------------------------------------------------
94# trigraph()
95#
96# Given an input string, this function replaces all trigraph sequences.
97# The following mapping is used:
98#
99#     ??=    #
100#     ??/    \
101#     ??'    ^
102#     ??(    [
103#     ??)    ]
104#     ??!    |
105#     ??<    {
106#     ??>    }
107#     ??-    ~
108# -----------------------------------------------------------------------------
109
110_trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
111_trigraph_rep = {
112    '=':'#',
113    '/':'\\',
114    "'":'^',
115    '(':'[',
116    ')':']',
117    '!':'|',
118    '<':'{',
119    '>':'}',
120    '-':'~'
121}
122
123def trigraph(input):
124    return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
125
126# ------------------------------------------------------------------
127# Macro object
128#
129# This object holds information about preprocessor macros
130#
131#    .name      - Macro name (string)
132#    .value     - Macro value (a list of tokens)
133#    .arglist   - List of argument names
134#    .variadic  - Boolean indicating whether or not variadic macro
135#    .vararg    - Name of the variadic parameter
136#
137# When a macro is created, the macro replacement token sequence is
138# pre-scanned and used to create patch lists that are later used
139# during macro expansion
140# ------------------------------------------------------------------
141
142class Macro(object):
143    def __init__(self,name,value,arglist=None,variadic=False):
144        self.name = name
145        self.value = value
146        self.arglist = arglist
147        self.variadic = variadic
148        if variadic:
149            self.vararg = arglist[-1]
150        self.source = None
151
152# ------------------------------------------------------------------
153# Preprocessor object
154#
155# Object representing a preprocessor.  Contains macro definitions,
156# include directories, and other information
157# ------------------------------------------------------------------
158
159class Preprocessor(object):
160    def __init__(self,lexer=None):
161        if lexer is None:
162            lexer = lex.lexer
163        self.lexer = lexer
164        self.macros = { }
165        self.path = []
166        self.temp_path = []
167
168        # Probe the lexer for selected tokens
169        self.lexprobe()
170
171        tm = time.localtime()
172        self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
173        self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
174        self.parser = None
175
176    # -----------------------------------------------------------------------------
177    # tokenize()
178    #
179    # Utility function. Given a string of text, tokenize into a list of tokens
180    # -----------------------------------------------------------------------------
181
182    def tokenize(self,text):
183        tokens = []
184        self.lexer.input(text)
185        while True:
186            tok = self.lexer.token()
187            if not tok: break
188            tokens.append(tok)
189        return tokens
190
191    # ---------------------------------------------------------------------
192    # error()
193    #
194    # Report a preprocessor error/warning of some kind
195    # ----------------------------------------------------------------------
196
197    def error(self,file,line,msg):
198        print("%s:%d %s" % (file,line,msg))
199
200    # ----------------------------------------------------------------------
201    # lexprobe()
202    #
203    # This method probes the preprocessor lexer object to discover
204    # the token types of symbols that are important to the preprocessor.
205    # If this works right, the preprocessor will simply "work"
206    # with any suitable lexer regardless of how tokens have been named.
207    # ----------------------------------------------------------------------
208
209    def lexprobe(self):
210
211        # Determine the token type for identifiers
212        self.lexer.input("identifier")
213        tok = self.lexer.token()
214        if not tok or tok.value != "identifier":
215            print("Couldn't determine identifier type")
216        else:
217            self.t_ID = tok.type
218
219        # Determine the token type for integers
220        self.lexer.input("12345")
221        tok = self.lexer.token()
222        if not tok or int(tok.value) != 12345:
223            print("Couldn't determine integer type")
224        else:
225            self.t_INTEGER = tok.type
226            self.t_INTEGER_TYPE = type(tok.value)
227
228        # Determine the token type for strings enclosed in double quotes
229        self.lexer.input("\"filename\"")
230        tok = self.lexer.token()
231        if not tok or tok.value != "\"filename\"":
232            print("Couldn't determine string type")
233        else:
234            self.t_STRING = tok.type
235
236        # Determine the token type for whitespace--if any
237        self.lexer.input("  ")
238        tok = self.lexer.token()
239        if not tok or tok.value != "  ":
240            self.t_SPACE = None
241        else:
242            self.t_SPACE = tok.type
243
244        # Determine the token type for newlines
245        self.lexer.input("\n")
246        tok = self.lexer.token()
247        if not tok or tok.value != "\n":
248            self.t_NEWLINE = None
249            print("Couldn't determine token for newlines")
250        else:
251            self.t_NEWLINE = tok.type
252
253        self.t_WS = (self.t_SPACE, self.t_NEWLINE)
254
255        # Check for other characters used by the preprocessor
256        chars = [ '<','>','#','##','\\','(',')',',','.']
257        for c in chars:
258            self.lexer.input(c)
259            tok = self.lexer.token()
260            if not tok or tok.value != c:
261                print("Unable to lex '%s' required for preprocessor" % c)
262
263    # ----------------------------------------------------------------------
264    # add_path()
265    #
266    # Adds a search path to the preprocessor.
267    # ----------------------------------------------------------------------
268
269    def add_path(self,path):
270        self.path.append(path)
271
272    # ----------------------------------------------------------------------
273    # group_lines()
274    #
275    # Given an input string, this function splits it into lines.  Trailing whitespace
276    # is removed.   Any line ending with \ is grouped with the next line.  This
277    # function forms the lowest level of the preprocessor---grouping into text into
278    # a line-by-line format.
279    # ----------------------------------------------------------------------
280
281    def group_lines(self,input):
282        lex = self.lexer.clone()
283        lines = [x.rstrip() for x in input.splitlines()]
284        for i in xrange(len(lines)):
285            j = i+1
286            while lines[i].endswith('\\') and (j < len(lines)):
287                lines[i] = lines[i][:-1]+lines[j]
288                lines[j] = ""
289                j += 1
290
291        input = "\n".join(lines)
292        lex.input(input)
293        lex.lineno = 1
294
295        current_line = []
296        while True:
297            tok = lex.token()
298            if not tok:
299                break
300            current_line.append(tok)
301            if tok.type in self.t_WS and '\n' in tok.value:
302                yield current_line
303                current_line = []
304
305        if current_line:
306            yield current_line
307
308    # ----------------------------------------------------------------------
309    # tokenstrip()
310    #
311    # Remove leading/trailing whitespace tokens from a token list
312    # ----------------------------------------------------------------------
313
314    def tokenstrip(self,tokens):
315        i = 0
316        while i < len(tokens) and tokens[i].type in self.t_WS:
317            i += 1
318        del tokens[:i]
319        i = len(tokens)-1
320        while i >= 0 and tokens[i].type in self.t_WS:
321            i -= 1
322        del tokens[i+1:]
323        return tokens
324
325
326    # ----------------------------------------------------------------------
327    # collect_args()
328    #
329    # Collects comma separated arguments from a list of tokens.   The arguments
330    # must be enclosed in parenthesis.  Returns a tuple (tokencount,args,positions)
331    # where tokencount is the number of tokens consumed, args is a list of arguments,
332    # and positions is a list of integers containing the starting index of each
333    # argument.  Each argument is represented by a list of tokens.
334    #
335    # When collecting arguments, leading and trailing whitespace is removed
336    # from each argument.
337    #
338    # This function properly handles nested parenthesis and commas---these do not
339    # define new arguments.
340    # ----------------------------------------------------------------------
341
342    def collect_args(self,tokenlist):
343        args = []
344        positions = []
345        current_arg = []
346        nesting = 1
347        tokenlen = len(tokenlist)
348
349        # Search for the opening '('.
350        i = 0
351        while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
352            i += 1
353
354        if (i < tokenlen) and (tokenlist[i].value == '('):
355            positions.append(i+1)
356        else:
357            self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
358            return 0, [], []
359
360        i += 1
361
362        while i < tokenlen:
363            t = tokenlist[i]
364            if t.value == '(':
365                current_arg.append(t)
366                nesting += 1
367            elif t.value == ')':
368                nesting -= 1
369                if nesting == 0:
370                    if current_arg:
371                        args.append(self.tokenstrip(current_arg))
372                        positions.append(i)
373                    return i+1,args,positions
374                current_arg.append(t)
375            elif t.value == ',' and nesting == 1:
376                args.append(self.tokenstrip(current_arg))
377                positions.append(i+1)
378                current_arg = []
379            else:
380                current_arg.append(t)
381            i += 1
382
383        # Missing end argument
384        self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
385        return 0, [],[]
386
387    # ----------------------------------------------------------------------
388    # macro_prescan()
389    #
390    # Examine the macro value (token sequence) and identify patch points
391    # This is used to speed up macro expansion later on---we'll know
392    # right away where to apply patches to the value to form the expansion
393    # ----------------------------------------------------------------------
394
395    def macro_prescan(self,macro):
396        macro.patch     = []             # Standard macro arguments
397        macro.str_patch = []             # String conversion expansion
398        macro.var_comma_patch = []       # Variadic macro comma patch
399        i = 0
400        while i < len(macro.value):
401            if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
402                argnum = macro.arglist.index(macro.value[i].value)
403                # Conversion of argument to a string
404                if i > 0 and macro.value[i-1].value == '#':
405                    macro.value[i] = copy.copy(macro.value[i])
406                    macro.value[i].type = self.t_STRING
407                    del macro.value[i-1]
408                    macro.str_patch.append((argnum,i-1))
409                    continue
410                # Concatenation
411                elif (i > 0 and macro.value[i-1].value == '##'):
412                    macro.patch.append(('c',argnum,i-1))
413                    del macro.value[i-1]
414                    continue
415                elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
416                    macro.patch.append(('c',argnum,i))
417                    i += 1
418                    continue
419                # Standard expansion
420                else:
421                    macro.patch.append(('e',argnum,i))
422            elif macro.value[i].value == '##':
423                if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
424                        ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
425                        (macro.value[i+1].value == macro.vararg):
426                    macro.var_comma_patch.append(i-1)
427            i += 1
428        macro.patch.sort(key=lambda x: x[2],reverse=True)
429
430    # ----------------------------------------------------------------------
431    # macro_expand_args()
432    #
433    # Given a Macro and list of arguments (each a token list), this method
434    # returns an expanded version of a macro.  The return value is a token sequence
435    # representing the replacement macro tokens
436    # ----------------------------------------------------------------------
437
438    def macro_expand_args(self,macro,args):
439        # Make a copy of the macro token sequence
440        rep = [copy.copy(_x) for _x in macro.value]
441
442        # Make string expansion patches.  These do not alter the length of the replacement sequence
443
444        str_expansion = {}
445        for argnum, i in macro.str_patch:
446            if argnum not in str_expansion:
447                str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
448            rep[i] = copy.copy(rep[i])
449            rep[i].value = str_expansion[argnum]
450
451        # Make the variadic macro comma patch.  If the variadic macro argument is empty, we get rid
452        comma_patch = False
453        if macro.variadic and not args[-1]:
454            for i in macro.var_comma_patch:
455                rep[i] = None
456                comma_patch = True
457
458        # Make all other patches.   The order of these matters.  It is assumed that the patch list
459        # has been sorted in reverse order of patch location since replacements will cause the
460        # size of the replacement sequence to expand from the patch point.
461
462        expanded = { }
463        for ptype, argnum, i in macro.patch:
464            # Concatenation.   Argument is left unexpanded
465            if ptype == 'c':
466                rep[i:i+1] = args[argnum]
467            # Normal expansion.  Argument is macro expanded first
468            elif ptype == 'e':
469                if argnum not in expanded:
470                    expanded[argnum] = self.expand_macros(args[argnum])
471                rep[i:i+1] = expanded[argnum]
472
473        # Get rid of removed comma if necessary
474        if comma_patch:
475            rep = [_i for _i in rep if _i]
476
477        return rep
478
479
480    # ----------------------------------------------------------------------
481    # expand_macros()
482    #
483    # Given a list of tokens, this function performs macro expansion.
484    # The expanded argument is a dictionary that contains macros already
485    # expanded.  This is used to prevent infinite recursion.
486    # ----------------------------------------------------------------------
487
488    def expand_macros(self,tokens,expanded=None):
489        if expanded is None:
490            expanded = {}
491        i = 0
492        while i < len(tokens):
493            t = tokens[i]
494            if t.type == self.t_ID:
495                if t.value in self.macros and t.value not in expanded:
496                    # Yes, we found a macro match
497                    expanded[t.value] = True
498
499                    m = self.macros[t.value]
500                    if not m.arglist:
501                        # A simple macro
502                        ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
503                        for e in ex:
504                            e.lineno = t.lineno
505                        tokens[i:i+1] = ex
506                        i += len(ex)
507                    else:
508                        # A macro with arguments
509                        j = i + 1
510                        while j < len(tokens) and tokens[j].type in self.t_WS:
511                            j += 1
512                        if tokens[j].value == '(':
513                            tokcount,args,positions = self.collect_args(tokens[j:])
514                            if not m.variadic and len(args) !=  len(m.arglist):
515                                self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
516                                i = j + tokcount
517                            elif m.variadic and len(args) < len(m.arglist)-1:
518                                if len(m.arglist) > 2:
519                                    self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
520                                else:
521                                    self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
522                                i = j + tokcount
523                            else:
524                                if m.variadic:
525                                    if len(args) == len(m.arglist)-1:
526                                        args.append([])
527                                    else:
528                                        args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
529                                        del args[len(m.arglist):]
530
531                                # Get macro replacement text
532                                rep = self.macro_expand_args(m,args)
533                                rep = self.expand_macros(rep,expanded)
534                                for r in rep:
535                                    r.lineno = t.lineno
536                                tokens[i:j+tokcount] = rep
537                                i += len(rep)
538                    del expanded[t.value]
539                    continue
540                elif t.value == '__LINE__':
541                    t.type = self.t_INTEGER
542                    t.value = self.t_INTEGER_TYPE(t.lineno)
543
544            i += 1
545        return tokens
546
547    # ----------------------------------------------------------------------
548    # evalexpr()
549    #
550    # Evaluate an expression token sequence for the purposes of evaluating
551    # integral expressions.
552    # ----------------------------------------------------------------------
553
554    def evalexpr(self,tokens):
555        # tokens = tokenize(line)
556        # Search for defined macros
557        i = 0
558        while i < len(tokens):
559            if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
560                j = i + 1
561                needparen = False
562                result = "0L"
563                while j < len(tokens):
564                    if tokens[j].type in self.t_WS:
565                        j += 1
566                        continue
567                    elif tokens[j].type == self.t_ID:
568                        if tokens[j].value in self.macros:
569                            result = "1L"
570                        else:
571                            result = "0L"
572                        if not needparen: break
573                    elif tokens[j].value == '(':
574                        needparen = True
575                    elif tokens[j].value == ')':
576                        break
577                    else:
578                        self.error(self.source,tokens[i].lineno,"Malformed defined()")
579                    j += 1
580                tokens[i].type = self.t_INTEGER
581                tokens[i].value = self.t_INTEGER_TYPE(result)
582                del tokens[i+1:j+1]
583            i += 1
584        tokens = self.expand_macros(tokens)
585        for i,t in enumerate(tokens):
586            if t.type == self.t_ID:
587                tokens[i] = copy.copy(t)
588                tokens[i].type = self.t_INTEGER
589                tokens[i].value = self.t_INTEGER_TYPE("0L")
590            elif t.type == self.t_INTEGER:
591                tokens[i] = copy.copy(t)
592                # Strip off any trailing suffixes
593                tokens[i].value = str(tokens[i].value)
594                while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
595                    tokens[i].value = tokens[i].value[:-1]
596
597        expr = "".join([str(x.value) for x in tokens])
598        expr = expr.replace("&&"," and ")
599        expr = expr.replace("||"," or ")
600        expr = expr.replace("!"," not ")
601        try:
602            result = eval(expr)
603        except Exception:
604            self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
605            result = 0
606        return result
607
608    # ----------------------------------------------------------------------
609    # parsegen()
610    #
611    # Parse an input string/
612    # ----------------------------------------------------------------------
613    def parsegen(self,input,source=None):
614
615        # Replace trigraph sequences
616        t = trigraph(input)
617        lines = self.group_lines(t)
618
619        if not source:
620            source = ""
621
622        self.define("__FILE__ \"%s\"" % source)
623
624        self.source = source
625        chunk = []
626        enable = True
627        iftrigger = False
628        ifstack = []
629
630        for x in lines:
631            for i,tok in enumerate(x):
632                if tok.type not in self.t_WS: break
633            if tok.value == '#':
634                # Preprocessor directive
635
636                # insert necessary whitespace instead of eaten tokens
637                for tok in x:
638                    if tok.type in self.t_WS and '\n' in tok.value:
639                        chunk.append(tok)
640
641                dirtokens = self.tokenstrip(x[i+1:])
642                if dirtokens:
643                    name = dirtokens[0].value
644                    args = self.tokenstrip(dirtokens[1:])
645                else:
646                    name = ""
647                    args = []
648
649                if name == 'define':
650                    if enable:
651                        for tok in self.expand_macros(chunk):
652                            yield tok
653                        chunk = []
654                        self.define(args)
655                elif name == 'include':
656                    if enable:
657                        for tok in self.expand_macros(chunk):
658                            yield tok
659                        chunk = []
660                        oldfile = self.macros['__FILE__']
661                        for tok in self.include(args):
662                            yield tok
663                        self.macros['__FILE__'] = oldfile
664                        self.source = source
665                elif name == 'undef':
666                    if enable:
667                        for tok in self.expand_macros(chunk):
668                            yield tok
669                        chunk = []
670                        self.undef(args)
671                elif name == 'ifdef':
672                    ifstack.append((enable,iftrigger))
673                    if enable:
674                        if not args[0].value in self.macros:
675                            enable = False
676                            iftrigger = False
677                        else:
678                            iftrigger = True
679                elif name == 'ifndef':
680                    ifstack.append((enable,iftrigger))
681                    if enable:
682                        if args[0].value in self.macros:
683                            enable = False
684                            iftrigger = False
685                        else:
686                            iftrigger = True
687                elif name == 'if':
688                    ifstack.append((enable,iftrigger))
689                    if enable:
690                        result = self.evalexpr(args)
691                        if not result:
692                            enable = False
693                            iftrigger = False
694                        else:
695                            iftrigger = True
696                elif name == 'elif':
697                    if ifstack:
698                        if ifstack[-1][0]:     # We only pay attention if outer "if" allows this
699                            if enable:         # If already true, we flip enable False
700                                enable = False
701                            elif not iftrigger:   # If False, but not triggered yet, we'll check expression
702                                result = self.evalexpr(args)
703                                if result:
704                                    enable  = True
705                                    iftrigger = True
706                    else:
707                        self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
708
709                elif name == 'else':
710                    if ifstack:
711                        if ifstack[-1][0]:
712                            if enable:
713                                enable = False
714                            elif not iftrigger:
715                                enable = True
716                                iftrigger = True
717                    else:
718                        self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
719
720                elif name == 'endif':
721                    if ifstack:
722                        enable,iftrigger = ifstack.pop()
723                    else:
724                        self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
725                else:
726                    # Unknown preprocessor directive
727                    pass
728
729            else:
730                # Normal text
731                if enable:
732                    chunk.extend(x)
733
734        for tok in self.expand_macros(chunk):
735            yield tok
736        chunk = []
737
738    # ----------------------------------------------------------------------
739    # include()
740    #
741    # Implementation of file-inclusion
742    # ----------------------------------------------------------------------
743
744    def include(self,tokens):
745        # Try to extract the filename and then process an include file
746        if not tokens:
747            return
748        if tokens:
749            if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
750                tokens = self.expand_macros(tokens)
751
752            if tokens[0].value == '<':
753                # Include <...>
754                i = 1
755                while i < len(tokens):
756                    if tokens[i].value == '>':
757                        break
758                    i += 1
759                else:
760                    print("Malformed #include <...>")
761                    return
762                filename = "".join([x.value for x in tokens[1:i]])
763                path = self.path + [""] + self.temp_path
764            elif tokens[0].type == self.t_STRING:
765                filename = tokens[0].value[1:-1]
766                path = self.temp_path + [""] + self.path
767            else:
768                print("Malformed #include statement")
769                return
770        for p in path:
771            iname = os.path.join(p,filename)
772            try:
773                data = open(iname,"r").read()
774                dname = os.path.dirname(iname)
775                if dname:
776                    self.temp_path.insert(0,dname)
777                for tok in self.parsegen(data,filename):
778                    yield tok
779                if dname:
780                    del self.temp_path[0]
781                break
782            except IOError:
783                pass
784        else:
785            print("Couldn't find '%s'" % filename)
786
787    # ----------------------------------------------------------------------
788    # define()
789    #
790    # Define a new macro
791    # ----------------------------------------------------------------------
792
793    def define(self,tokens):
794        if isinstance(tokens,STRING_TYPES):
795            tokens = self.tokenize(tokens)
796
797        linetok = tokens
798        try:
799            name = linetok[0]
800            if len(linetok) > 1:
801                mtype = linetok[1]
802            else:
803                mtype = None
804            if not mtype:
805                m = Macro(name.value,[])
806                self.macros[name.value] = m
807            elif mtype.type in self.t_WS:
808                # A normal macro
809                m = Macro(name.value,self.tokenstrip(linetok[2:]))
810                self.macros[name.value] = m
811            elif mtype.value == '(':
812                # A macro with arguments
813                tokcount, args, positions = self.collect_args(linetok[1:])
814                variadic = False
815                for a in args:
816                    if variadic:
817                        print("No more arguments may follow a variadic argument")
818                        break
819                    astr = "".join([str(_i.value) for _i in a])
820                    if astr == "...":
821                        variadic = True
822                        a[0].type = self.t_ID
823                        a[0].value = '__VA_ARGS__'
824                        variadic = True
825                        del a[1:]
826                        continue
827                    elif astr[-3:] == "..." and a[0].type == self.t_ID:
828                        variadic = True
829                        del a[1:]
830                        # If, for some reason, "." is part of the identifier, strip off the name for the purposes
831                        # of macro expansion
832                        if a[0].value[-3:] == '...':
833                            a[0].value = a[0].value[:-3]
834                        continue
835                    if len(a) > 1 or a[0].type != self.t_ID:
836                        print("Invalid macro argument")
837                        break
838                else:
839                    mvalue = self.tokenstrip(linetok[1+tokcount:])
840                    i = 0
841                    while i < len(mvalue):
842                        if i+1 < len(mvalue):
843                            if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
844                                del mvalue[i]
845                                continue
846                            elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
847                                del mvalue[i+1]
848                        i += 1
849                    m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
850                    self.macro_prescan(m)
851                    self.macros[name.value] = m
852            else:
853                print("Bad macro definition")
854        except LookupError:
855            print("Bad macro definition")
856
857    # ----------------------------------------------------------------------
858    # undef()
859    #
860    # Undefine a macro
861    # ----------------------------------------------------------------------
862
863    def undef(self,tokens):
864        id = tokens[0].value
865        try:
866            del self.macros[id]
867        except LookupError:
868            pass
869
870    # ----------------------------------------------------------------------
871    # parse()
872    #
873    # Parse input text.
874    # ----------------------------------------------------------------------
875    def parse(self,input,source=None,ignore={}):
876        self.ignore = ignore
877        self.parser = self.parsegen(input,source)
878
879    # ----------------------------------------------------------------------
880    # token()
881    #
882    # Method to return individual tokens
883    # ----------------------------------------------------------------------
884    def token(self):
885        try:
886            while True:
887                tok = next(self.parser)
888                if tok.type not in self.ignore: return tok
889        except StopIteration:
890            self.parser = None
891            return None
892
893if __name__ == '__main__':
894    import ply.lex as lex
895    lexer = lex.lex()
896
897    # Run a preprocessor
898    import sys
899    f = open(sys.argv[1])
900    input = f.read()
901
902    p = Preprocessor(lexer)
903    p.parse(input,sys.argv[1])
904    while True:
905        tok = p.token()
906        if not tok: break
907        print(p.source, tok)
908
909
910
911
912
913
914
915
916
917
918
919