1#!/usr/bin/env python
2""" cdecl.py - parse c declarations
3
4(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com>
5Released under GNU LGPL license.
6
7version 0.xx
8
9"""
10
11import sys
12import string
13import types
14import copy
15
16#from cparse import BasicType, Qualifier, StorageClass, Typedef, Ellipses, GCCBuiltin
17#from cparse import *
18
19import cparse as host
20
21class LexError(Exception):
22  pass
23
24class Lexer(object):
25  def __init__(self,s="",verbose=0,**kw):
26    self.verbose = verbose
27    self.lookup = {} # a map for keywords and typedefs
28    for t in \
29      "float double void char int".split():
30      self.lookup[t] = host.BasicType( t )
31    for t in \
32      "register signed unsigned short long const volatile inline".split(): # inline here ???
33      self.lookup[t] = host.Qualifier( t )
34    for t in "extern static auto".split():
35      self.lookup[t] = host.StorageClass( t )
36    self.lookup['typedef'] = host.Typedef()
37    #self.lookup['__inline__'] = host.GCCBuiltin('__inline__')
38    #self.lookup['__extension__'] = host.Qualifier('__extension__')
39    self.lookup['...'] = host.Ellipses()
40    if s:
41      self.lex(s)
42    for key in kw.keys():
43      self.__dict__[key] = kw[key]
44
45  def lex(self,s):
46    self.stack = None
47    self.lines = s.splitlines()
48    self.set_state("","",0,0)
49    self.so_file = ""
50    self._newline()
51    self.get_token() # start
52
53  def mktypedef(self,tok,node):
54    if self.verbose:
55      print "%s.mktypedef(%s,%s)"%(self,tok,node)
56    self.lookup[ tok ] = node
57
58  def rmtypedef(self,tok):
59    " used in round trip testing "
60#    print "# rmtypedef(%s)"%tok
61    assert isinstance( self.lookup[ tok ], host.Node ) # existance
62    del self.lookup[ tok ]
63
64  def _get_kind(self,tok):
65    #print '_get_kind(%s)'%tok,self.lookup
66    try:
67      return self.lookup[tok]
68      #return self.lookup[tok].clone()
69    except KeyError:
70      if tok.startswith("__builtin"):
71        node = host.GCCBuiltin(tok)
72        self.lookup[tok] = node
73        return node
74      #elif tok in ( "__extension__", ):
75        #node = GCCBuiltin(tok)
76        #self.lookup[tok] = node
77        #return node
78      return None
79
80  def _newline(self):
81    while self.lno < len(self.lines):
82      line = self.lines[self.lno]
83      if not line or line[0] != "#":
84        break
85      l = line.split('"')
86      assert len(l)>=2
87      self.so_file = l[1]
88      #self.so_lno = int( l[0].split()[1] )
89      #sys.stderr.write("# %s %s: %s\n"%(so_lno,so_file,l))
90      self.lno+=1
91
92  def get_brace_token( self ):
93    self.push_state()
94    ident_chars0 = string.letters+"_"
95    ident_chars1 = string.letters+string.digits+"_"
96    tok, kind = "", ""
97    while self.lno < len(self.lines):
98      s = self.lines[self.lno]
99      i=self.col
100      while i < len(s):
101        if s[i] not in '{}':
102          i=i+1
103          continue
104        else:
105          tok = s[i]
106          kind = tok
107          self.col = i+1
108          break
109        # keep moving
110        #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
111        i=i+1
112      if i==len(s):
113        # nothing found
114        assert tok == ""
115        self.col=0
116        self.lno+=1
117        self._newline()
118      else:
119        assert tok
120        break
121    self.set_state(tok,kind,self.lno,self.col)
122
123  def get_token(self):
124    self.push_state()
125    ident_chars0 = string.letters+"_"
126    ident_chars1 = string.letters+string.digits+"_"
127    tok, kind = "", ""
128    while self.lno < len(self.lines):
129      s = self.lines[self.lno]
130      i=self.col
131      while i < len(s):
132        if s[i].isspace():
133          i=i+1
134          continue
135        #if s[i] in ident_chars0:
136        if s[i].isalpha() or s[i]=='_':
137          # identifier
138          j=i+1
139          while j<len(s):
140            if s[j] in ident_chars1:
141              j=j+1
142            else:
143              break
144          tok = s[i:j]
145          self.col = j
146          kind = self._get_kind(tok)
147          break
148        if s[i].isdigit() or \
149            (i+1<len(s) and s[i] in '+-.' and s[i+1].isdigit()):
150          # number literal
151          is_float = s[i]=='.'
152          is_hex = s[i:i+2]=='0x'
153          if is_hex:
154            i=i+2
155            assert s[i].isdigit() or s[i] in "abcdefABCDEF", self.err_string()
156          j=i+1
157          while j<len(s):
158            #print "lex ",repr(s[i]),is_float
159            if s[j].isdigit() or (is_hex and s[j] in "abcdefABCDEF"):
160              j=j+1
161            elif s[j]=='.' and not is_float:
162              assert not is_hex
163              j=j+1
164              is_float=1
165            else:
166              break
167          tok = s[i:j]
168          self.col = j
169          if is_float:
170            kind = float(tok)
171          elif is_hex:
172            kind = int(tok,16)
173          else:
174            kind = int(tok)
175          break
176        if s[i:i+3]=='...':
177          # ellipses
178          #sys.stderr.write( "ELLIPSES "+str(self.get_state()) )
179          tok = s[i:i+3]
180          kind = self._get_kind(tok)
181          self.col = i+3
182          break
183        if s[i] in '*/{}()[]:;,=+-~.<>|&':
184          tok = s[i]
185          kind = tok
186          self.col = i+1
187          break
188        if s[i] == "'":
189          j = i+2
190          while j<len(s) and s[j]!="'":
191            j+=1
192          if j==len(s):
193            raise LexError( self.err_string() + "unterminated char constant" )
194          tok = s[i:j+1]
195          self.col = j+1
196          kind = s[i:j+1]
197          break
198        # keep moving
199        #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] )
200        sys.stderr.write( "lexer ignoring '%s' lno=%d\n"%(s[i],self.lno+1) )
201        i=i+1
202        # end while i < len(s)
203      if i==len(s):
204        # nothing found, go to next line
205        assert tok == ""
206        self.col=0
207        self.lno+=1
208        self._newline()
209      else:
210        # we got one
211        assert tok
212        break
213      # end while self.lno < len(self.lines):
214    self.set_state(tok,kind,self.lno,self.col)
215
216  def err_string(self):
217    "Return helpful error string :)"
218    return self.lines[self.lno]+"\n"+" "*self.col+"^\n"
219
220  def push_state(self):
221    self.stack = self.get_state() # a short stack :)
222    #self.stack.push( self.get_state() )
223
224  def unget_token(self):
225    assert self.stack is not None
226    self.set_state(*self.stack)
227    self.stack = None
228
229  def set_state(self,tok,kind,lno,col):
230    if self.verbose:
231      print "tok,kind,lno,col = ",(tok,kind,lno,col)
232    self.tok = tok
233    self.kind = kind
234    self.lno = lno # line
235    self.col = col # column
236
237  def get_state(self):
238    return self.tok,self.kind,self.lno,self.col
239
240  def get_file(self):
241    return self.so_file
242
243###################################################################
244#
245###################################################################
246#
247
248
249