1#!/usr/bin/env python
2
3"""
4
5(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
6Released under GNU LGPL license.
7
8"""
9
10import sys
11
12from lexer import Lexer
13from parse_core import Symbols, Parser
14import node as node_module
15
16
17class Node(node_module.Node):
18
19  def is_typedef(self):
20    for x in self:
21      if isinstance(x,Node):
22        if x.is_typedef():
23          return 1
24    return 0
25
26  #def explain(self):
27    #l = []
28    #for x in self:
29      #if isinstance(x,Node):
30        #l.append(x.explain())
31      #else:
32        #l.append(str(x))
33    #return string.join(l," ")
34      ##(self.__class__.__name__,string.join(l) )
35
36  def psource(self):
37    if hasattr(self,'lines'):
38      print "# "+string.join(self.lines,"\n# ")+"\n"
39
40
41###################################################################
42#
43###################################################################
44#
45
46
47class BasicType(Node):
48  " int char short etc. "
49  def __init__(self,name):
50    Node.__init__(self,name)
51
52class Qualifier(Node):
53  """
54  """
55  def __init__(self,name):
56    Node.__init__(self,name)
57    self.name=name
58
59class StorageClass(Node):
60  """
61  """
62  def __init__(self,name):
63    Node.__init__(self,name)
64    self.name=name
65
66class Typedef(StorageClass):
67  """
68  """
69  def __init__(self,s='typedef'):
70    Node.__init__(self,s)
71  #def explain(self):
72    #return "type"
73
74class Ellipses(Node):
75  """
76  """
77  def __init__(self,s='...'):
78    Node.__init__(self,s)
79
80class GCCBuiltin(BasicType):
81  """
82  """
83  pass
84
85
86class Identifier(Node):
87  """
88  """
89  def __init__(self,name="",*items):
90    if name or 1:
91      Node.__init__(self,name,*items)
92    else:
93      Node.__init__(self)
94    self.name=name
95
96class Function(Node,Parser):
97  """
98  """
99  def __init__(self,*items):
100    Node.__init__(self,*items)
101
102  def parse(self,lexer,symbols):
103    symbols = Symbols(symbols)
104    args = ''
105    #lexer.get_token()
106    if lexer.tok != ')':
107      if not lexer.tok:
108        self.parse_error(lexer)
109      #lexer.unget_token() # unget start of decl
110      while lexer.tok != ')':
111        node = ParameterDeclaration()
112        node.parse(lexer,symbols)
113        self.append( node )
114        if lexer.tok != ')' and lexer.tok != ',':
115          self.parse_error(lexer)
116        if lexer.tok == ',':
117          lexer.get_token()
118    lexer.get_token()
119
120
121class Pointer(Node):
122  """
123  """
124  def __init__(self,*items):
125    Node.__init__(self,*items)
126
127class Array(Node,Parser):
128  """
129  """
130  def __init__(self,*items):
131    Node.__init__(self,*items)
132
133  def parse(self,lexer,symbols):
134    lexer.get_token() # a number or ']'
135    # XX
136    # HACK HACK: constant c expressions can appear in here:
137    # eg. [ 15 * sizeof (int) - 2 * sizeof (void *) ]
138    # XX
139    toks = []
140    while lexer.tok != ']':
141      #self.append( lexer.kind )
142      toks.append( lexer.tok )
143      lexer.get_token()
144    child = " ".join(toks)
145    if child == "":
146      child = None
147    self.append( child )
148    lexer.get_token() # read past the ']'
149
150class Tag(Node):
151  """
152  """
153  pass
154
155
156class Compound(Node,Parser):
157  "Struct or Union"
158
159  def __init__(self,*items,**kw):
160    Node.__init__(self,*items,**kw)
161
162  def parse(self,lexer,symbols):
163    symbols = Symbols(symbols)
164    tag = "" # anonymous
165    if lexer.tok != '{':
166      tag = lexer.tok
167      if not ( tag[0]=='_' or tag[0].isalpha() ):
168        self.parse_error(lexer ,"expected tag, got '%s'"%tag )
169      lexer.get_token()
170    if tag:
171      self.append(Tag(tag))
172    else:
173      self.append(Tag())
174    self.tag = tag
175    if lexer.tok == '{':
176      fieldlist = []
177      lexer.get_token()
178      if lexer.tok != '}':
179        if not lexer.tok: self.parse_error(lexer)
180        while lexer.tok != '}':
181          node = StructDeclaration()
182          node.parse(lexer,symbols)
183          fieldlist.append( node )
184      self += fieldlist
185      lexer.get_token()
186    if self.verbose:
187      print "%s.__init__() #<--"%(self)
188
189class Struct(Compound):
190  """
191  """
192  pass
193
194class Union(Compound):
195  """
196  """
197  pass
198
199class Enum(Node,Parser):
200  """
201  """
202  def __init__(self,*items,**kw):
203    Node.__init__(self,*items,**kw)
204
205  def parse(self,lexer,symbols):
206    tag = "" # anonymous
207    if lexer.tok != '{':
208      tag = lexer.tok
209      if not ( tag[0]=='_' or tag[0].isalpha() ):
210        self.parse_error(lexer ,"expected tag, got '%s'"%tag )
211      lexer.get_token()
212    if tag:
213      self.append(Tag(tag))
214    else:
215      self.append(Tag())
216    self.tag = tag
217    if lexer.tok == '{':
218      lexer.get_token()
219      if lexer.tok != '}': # XX dopey control flow
220        if not lexer.tok: # XX dopey control flow
221          self.parse_error(lexer) # XX dopey control flow
222        while lexer.tok != '}': # XX dopey control flow
223          if lexer.kind is not None:
224            self.expected_error(lexer ,"identifier" )
225          ident = Identifier(lexer.tok)
226          if symbols[ident[0]] is not None:
227            self.parse_error(lexer,"%s already defined."%ident[0])
228          symbols[ident[0]]=ident
229          self.append( ident )
230          lexer.get_token()
231          if lexer.tok == '=':
232            lexer.get_token()
233            # ConstantExpr
234            # XX hack hack XX
235            while lexer.tok!=',' and lexer.tok!='}':
236              lexer.get_token()
237#            if type( lexer.kind ) is not int:
238#              #self.parse_error(lexer ,"expected integer" )
239#              # XX hack hack XX
240#              while lexer.tok!=',' and lexer.tok!='}':
241#                lexer.get_token()
242#            else:
243#              # put initializer into the Identifier
244#              ident.append( lexer.kind )
245#              lexer.get_token()
246          if lexer.tok != '}':
247            if lexer.tok != ',':
248              self.expected_error(lexer,"}",",")
249            lexer.get_token() # ','
250      lexer.get_token()
251    if self.verbose:
252      print "%s.__init__() #<--"%(self)
253
254
255
256class Declarator(Node,Parser):
257  """
258  """
259  def __init__(self,*items):
260    Node.__init__(self,*items)
261    self.ident = None
262
263  def parse(self,lexer,symbols):
264    #Parser.parse_enter(self,lexer)
265    stack = []
266    # read up to identifier, pushing tokens onto stack
267    self.ident = self.parse_identifier(lexer,symbols,stack)
268    self.name = ''
269    if self.ident is not None:
270      self.append( self.ident )
271      self.name = self.ident.name
272    # now read outwards from identifier
273    self.parse_declarator(lexer,symbols,stack)
274    #Parser.parse_leave(self,lexer)
275
276  def parse_identifier(self,lexer,symbols,stack):
277    if self.verbose:
278      print "%s.parse_identifier()"%self
279    ident = None
280    if lexer.tok != ';':
281      while lexer.tok and lexer.kind is not None:
282        stack.append( (lexer.tok, lexer.kind) )
283        lexer.get_token()
284      if lexer.tok:
285        ident = Identifier( lexer.tok )
286        #stack.append( (ident.name, ident) )
287        lexer.get_token()
288    if self.verbose:
289      print "%s.parse_identifier()=%s"%(self,repr(ident))
290    return ident
291
292  def parse_declarator(self,lexer,symbols,stack,level=0):
293    if self.verbose:
294      print "  "*level+"%s.parse_declarator(%s) # --->"%\
295        (self,stack)
296    if lexer.tok == '[':
297      while lexer.tok == '[':
298        node = Array()
299        node.parse(lexer,symbols)
300        self.append(node)
301      if lexer.tok == '(':
302        self.parse_error(lexer ,"array of functions" )
303    elif lexer.tok == '(':
304      lexer.get_token()
305      node = Function()
306      node.parse(lexer,symbols)
307      self.append( node )
308      if lexer.tok == '(':
309        self.parse_error(lexer ,"function returns a function" )
310      if lexer.tok == '[':
311        self.parse_error(lexer ,"function returns an array" )
312    while stack:
313      tok, kind = stack[-1] # peek
314      if tok == '(':
315        stack.pop()
316        self.consume(lexer,')')
317        self.parse_declarator(lexer,symbols,stack,level+1)
318      elif tok == '*':
319        stack.pop()
320        self.append( Pointer() )
321      else:
322        tok, kind = stack.pop()
323        self.append( kind )
324    if self.verbose:
325      print "  "*level+"%s.parse_declarator(%s) # <---"%\
326        (self,stack)
327
328
329class AbstractDeclarator(Declarator):
330  """ used in ParameterDeclaration; may lack an identifier """
331
332  def parse_identifier(self,lexer,symbols,stack):
333    if self.verbose:
334      print "%s.parse_identifier()"%self
335    ident = None
336    ident = Identifier()
337    while 1:
338      if lexer.tok == ';':
339        self.parse_error(lexer)
340      if lexer.tok == ')':
341        break
342      if lexer.tok == ',':
343        break
344      if lexer.tok == '[':
345        break
346      if lexer.kind is None:
347        #print "%s.new identifier"%self
348        ident = Identifier( lexer.tok )
349        lexer.get_token()
350        #stack.append( (ident.name, ident) )
351        break
352      stack.append( (lexer.tok, lexer.kind) )
353      lexer.get_token()
354    if self.verbose:
355      print "%s.parse_identifier()=%s"%(self,repr(ident))
356    return ident
357
358class FieldLength(Node):
359  """
360  """
361  pass
362
363class StructDeclarator(Declarator):
364  """
365  """
366  def parse(self,lexer,symbols):
367    if lexer.tok != ':':
368      Declarator.parse(self,lexer,symbols)
369    if lexer.tok == ':':
370      lexer.get_token()
371      # ConstantExpr
372      length = int(lexer.tok)
373      #print "length = ",length
374      self.append( FieldLength(length) )
375      lexer.get_token()
376
377class DeclarationSpecifiers(Node,Parser):
378  """
379  """
380  def __init__(self,*items):
381    Node.__init__(self,*items)
382
383  def __eq__(self,other):
384    " unordered (set/bag) equality "
385    if not isinstance(other,Node):
386      return 0
387    for i in range(len(self)):
388      if not self[i] in other:
389        return 0
390    for i in range(len(other)):
391      if not other[i] in self:
392        return 0
393    return 1
394
395  def parse(self,lexer,symbols):
396    self.parse_spec(lexer,symbols)
397    self.reverse()
398
399  def parse_spec(self,lexer,symbols):
400    typespec = None
401    while lexer.tok:
402      if isinstance( lexer.kind, TypeAlias ) or\
403        isinstance( lexer.kind, BasicType ):
404        if typespec is not None:
405          self.parse_error(lexer ,"type already specified as %s"\
406            %typespec )
407        typespec=lexer.kind
408        self.append( lexer.kind )
409        lexer.get_token()
410      elif isinstance( lexer.kind, Qualifier ):
411        self.append( lexer.kind )
412        lexer.get_token()
413      elif isinstance( lexer.kind, StorageClass ):
414        self.append( lexer.kind )
415        lexer.get_token()
416      elif lexer.tok=='struct':
417        lexer.get_token()
418        self.parse_struct(lexer,symbols)
419        break #?
420      elif lexer.tok=='union':
421        lexer.get_token()
422        self.parse_union(lexer,symbols)
423        break #?
424      elif lexer.tok=='enum':
425        lexer.get_token()
426        self.parse_enum(lexer,symbols)
427        break #?
428      elif lexer.kind is None:
429        # identifier
430        break
431      else:
432        break
433
434  def parse_struct(self,lexer,symbols):
435    if self.verbose:
436      print "%s.parse_struct()"%(self)
437    node = Struct()
438    node.parse(lexer,symbols)
439    _node = None
440    if node.tag:
441      _node = symbols.get_tag( node.tag )
442    if _node is not None:
443      if not isinstance( _node, Struct ):
444        self.parse_error(lexer,"tag defined as wrong kind")
445      if len(node)>1:
446        if len(_node)>1:
447          self.parse_error(lexer,"tag already defined as %s"%_node)
448        #symbols.set_tag( node.tag, node )
449      #else:
450        # refer to the previously defined struct
451        ##node = _node
452        #node = _node.clone()
453    if 0:
454      # refer to the previously defined struct
455      if len(node)==1:
456        _node = symbols.deep_get_tag( node.tag )
457        if _node is not None:
458          node=_node
459      # But what about any future reference to the struct ?
460    if node.tag:
461      symbols.set_tag( node.tag, node )
462    self.append( node )
463
464  def parse_union(self,lexer,symbols):
465    if self.verbose:
466      print "%s.parse_union(%s)"%(self,node)
467    node = Union()
468    node.parse(lexer,symbols)
469    _node = None
470    if node.tag:
471      _node = symbols.get_tag( node.tag )
472    if _node is not None:
473      if not isinstance( _node, Union ):
474        self.parse_error(lexer,"tag %s defined as wrong kind"%repr(node.tag))
475      if len(node)>1:
476        if len(_node)>1:
477          self.parse_error(lexer,"tag already defined as %s"%_node)
478        #symbols.set_tag( node.tag, node )
479      #else:
480        #node = _node
481    #if len(node)==1:
482      #_node = symbols.deep_get_tag( node.tag )
483      #if _node is not None:
484        #node=_node
485    if node.tag:
486      symbols.set_tag( node.tag, node )
487    self.append( node )
488
489  def parse_enum(self,lexer,symbols):
490    if self.verbose:
491      print "%s.parse_enum(%s)"%(self,node)
492    node = Enum()
493    node.parse(lexer,symbols)
494    _node = None
495    if node.tag:
496      _node = symbols.get_tag( node.tag )
497    if _node is not None:
498      if not isinstance( _node, Enum ):
499        self.parse_error(lexer,"tag defined as wrong kind")
500      if len(node)>1:
501        if len(_node)>1:
502          self.parse_error(lexer,"tag already defined as %s"%_node)
503        #symbols.set_tag( node.tag, node )
504      #else:
505        #node = _node
506    #if len(node)==1:
507      #_node = symbols.deep_get_tag( node.tag )
508      #if _node is not None:
509        #node=_node
510    if node.tag:
511      symbols.set_tag( node.tag, node )
512    self.append( node )
513
514  def is_typedef(self):
515    return self.find(Typedef) is not None
516
517  def needs_declarator(self):
518    for node in self:
519      if isinstance( node, Struct ):
520        return False
521      if isinstance( node, Enum ):
522        return False
523      if isinstance( node, Union ):
524        return False
525    return True
526
527
528
529class TypeSpecifiers(DeclarationSpecifiers):
530  " used in ParameterDeclaration "
531
532  def parse_spec(self,lexer,symbols):
533    typespec = None
534    while lexer.tok:
535      if isinstance( lexer.kind, TypeAlias ) or\
536        isinstance( lexer.kind, BasicType ):
537        if typespec is not None:
538          self.parse_error(lexer ,"type already specified as %s"\
539            %typespec )
540        typespec=lexer.kind
541        self.append( lexer.kind )
542        lexer.get_token()
543      elif isinstance( lexer.kind, Qualifier ):
544        self.append( lexer.kind )
545        lexer.get_token()
546      elif isinstance( lexer.kind, StorageClass ):
547        self.parse_error(lexer ,"'%s' cannot appear here"%lexer.tok )
548      elif lexer.tok=='struct':
549        lexer.get_token()
550        self.parse_struct(lexer,symbols)
551        break #?
552      elif lexer.tok=='union':
553        lexer.get_token()
554        self.parse_union(lexer,symbols)
555        break #?
556      elif lexer.tok=='enum':
557        lexer.get_token()
558        self.parse_enum(lexer,symbols)
559        break #?
560      elif lexer.kind is None:
561        # identifier
562        break
563      else:
564        break
565
566
567class Initializer(Node,Parser):
568  """
569  """
570  def __init__(self,*items):
571    Node.__init__(self,*items)
572
573  def parse(self,lexer,symbols):
574    self.parse_error(lexer,"not implemented")
575
576
577class TypeAlias(Node):
578  " typedefed things "
579
580  def __init__(self,name,decl=None):
581    Node.__init__(self,name)#,decl)
582    self.name=name
583    self.decl=decl
584
585
586class Declaration(Node,Parser):
587  """
588  """
589  def __init__(self,*items):
590    Node.__init__(self,*items)
591    #self.acted=False
592
593  def parse(self,lexer,symbols):
594    if not lexer.tok:
595      return
596    Parser.parse_enter(self,lexer)
597    declspec = DeclarationSpecifiers()
598    declspec.parse(lexer,symbols)
599    if len(declspec)==0:
600      if lexer.tok == ';':
601        lexer.get_token()
602        # empty declaration...
603        return
604      self.parse_error(lexer,
605        "expected specifiers, got '%s'"%lexer.tok )
606    self.append(declspec)
607    while 1:
608      decl = Declarator()
609      decl.parse(lexer,symbols)
610      if len(decl)==0:
611        if declspec.needs_declarator():
612          self.parse_error(lexer,
613            "expected declarator, got '%s'"%lexer.tok )
614      self.append(decl)
615      ident = decl.ident
616      if ident is not None:
617      #if len(ident):
618        # install symbol
619        node = symbols[ident[0]]
620        if node is not None:
621          # we allow functions to be defined (as same) again
622          #print node.deepstr(),'\n', self.deepstr()
623          _node = node.clone()
624          _node.delete(Identifier)
625          _self = self.clone()
626          _self.delete(Identifier)
627          if _node != _self:
628            self.parse_error(lexer,
629              "\n%s\n  already defined as \n%s\n"%\
630              (self.deepstr(),node.deepstr()))
631        else:
632          if self.is_typedef():
633            #lexer.mktypedef( ident[0], self )
634            tp = TypeAlias(ident[0],decl)
635            lexer.mktypedef( ident[0], tp )
636          else:
637            symbols[ident[0]] = self
638        if lexer.tok == '=':
639          # parse initializer
640          lexer.get_token()
641          init = Initializer()
642          init.parse(lexer,symbols)
643          ident.append( init ) # as in Enum
644      #else: struct, union or enum
645      if lexer.tok == ';':
646        # no more declarators
647        break
648      if lexer.tok == '{':
649        # ! ahhh, function body !!!
650#        sys.stderr.write(
651#          "WARNING: function body found at line %s\n"%lexer.lno )
652        bcount = 1
653        while bcount:
654          lexer.get_brace_token()
655          if lexer.tok == '}':
656            bcount -= 1
657          if lexer.tok == '{':
658            bcount += 1
659        lexer.get_token()
660        Parser.parse_leave(self,lexer)
661        return
662      self.consume(lexer,',')
663    self.consume(lexer,';')
664    Parser.parse_leave(self,lexer)
665
666  def is_typedef(self):
667    spec=self[0]
668    assert isinstance(spec,DeclarationSpecifiers), self.deepstr()
669    return spec.is_typedef()
670
671
672class ParameterDeclaration(Declaration):
673  """
674  """
675  def parse(self,lexer,symbols):
676    typespec = TypeSpecifiers()
677    typespec.parse(lexer,symbols)
678    self.append(typespec)
679    decl = AbstractDeclarator()
680    decl.parse(lexer,symbols)
681    self.append(decl)
682    ident = decl.ident
683    if ident is not None and ident[0]:
684      node = symbols[ident[0]]
685      if node is not None:
686        self.parse_error(lexer,
687          "%s already defined as %s"%(ident,node))
688      else:
689        symbols[ident[0]] = self
690
691
692class StructDeclaration(Declaration):
693  """
694  """
695  def parse(self,lexer,symbols):
696    if not lexer.tok:
697      return
698    declspec = DeclarationSpecifiers()
699    declspec.parse(lexer,symbols)
700    self.append(declspec)
701    if len(declspec)==0:
702      if lexer.tok == ';':
703        lexer.get_token()
704        # empty declaration...
705        return
706      self.parse_error(lexer,
707        "expected specifiers, got '%s'"%lexer.tok )
708    while 1:
709      decl = StructDeclarator()
710      decl.parse(lexer,symbols)
711      if len(decl)==0:
712        self.parse_error(lexer,
713          "expected declarator, got '%s'"%lexer.tok )
714      self.append(decl)
715      ident = decl.ident
716      if ident is not None:
717        node = symbols[ident[0]]
718        if node is not None:
719          self.parse_error(lexer ,
720            "%s already defined as %s"%(ident,node))
721        else:
722          if declspec.is_typedef():
723            self.parse_error(lexer,"typedef in struct or union")
724          else:
725            symbols[ident[0]] = self
726      if lexer.tok == ';':
727        break
728      self.consume(lexer,',')
729    self.consume(lexer,';')
730
731
732class TransUnit(Node,Parser):
733  """
734  """
735  def __init__(self,*items,**kw):
736    Node.__init__(self,*items,**kw)
737
738  def parse(self,s,verbose=0):
739    self.symbols = Symbols()
740    self.lexer = Lexer(s,verbose=verbose) #,host=__module__)
741    node = None
742    while self.lexer.tok:
743      node=Declaration()
744      node.parse(self.lexer,self.symbols)
745      #sys.stderr.write( "# line %s\n"%self.lexer.lno )
746      if node:
747        self.append(node)
748        #node.psource()
749        #print node.deepstr(),'\n'
750        #node.act()
751
752  def strip(self,files):
753    " leave only the declarations from <files> "
754    i=0
755    while i<len(self):
756      if self[i].file in files:
757        i=i+1
758      else:
759        self.pop(i)
760
761  def strip_filter(self,cb):
762    " leave only the declarations such that cb(file) "
763    i=0
764    while i<len(self):
765      if cb(self[i].file):
766        i=i+1
767      else:
768        self.pop(i)
769
770  def assert_no_dups(self):
771    check={}
772    for node in self.nodes():
773      assert not check.has_key(id(node))
774      check[id(node)]=1
775
776
777
778try:
779  import NoModule
780  import psyco
781  from psyco.classes import *
782except ImportError:
783  class _psyco:
784    def jit(self):      pass
785    def bind(self, f):  pass
786    def proxy(self, f): return f
787  psyco = _psyco()
788psyco.bind( Lexer.get_token )
789psyco.bind( Node )
790
791def run0():
792  verbose = 0
793  if not sys.argv[1:]:
794    s = sys.stdin.read()
795  if sys.argv[1:]:
796    s = sys.argv[1]
797    #if sys.argv[2:]:
798      #verbose = int(sys.argv[2])
799  if 0:
800    import profile
801    profile.run('TransUnit(s)','prof.out')
802    import pstats
803    p=pstats.Stats('prof.out')
804    p.strip_dirs().sort_stats(-1).print_stats()
805  else:
806    node = TransUnit(verbose = 1 )
807    node.parse(s)
808    node.act(1,1,1)
809
810def run1():
811  cstr = "char *(*)() ,"
812  node = AbstractDeclarator()
813  node.parse( Lexer(cstr,True), Symbols() )
814  print node.deepstr()
815
816if __name__=="__main__":
817  pass
818
819
820