1#!/usr/bin/env python 2""" cdecl.py - parse c declarations 3 4(c) 2002, 2003, 2004, 2005 Simon Burton <simon@arrowtheory.com> 5Released under GNU LGPL license. 6 7version 0.xx 8 9""" 10 11import sys 12import string 13import types 14import copy 15 16#from cparse import BasicType, Qualifier, StorageClass, Typedef, Ellipses, GCCBuiltin 17#from cparse import * 18 19import cparse as host 20 21class LexError(Exception): 22 pass 23 24class Lexer(object): 25 def __init__(self,s="",verbose=0,**kw): 26 self.verbose = verbose 27 self.lookup = {} # a map for keywords and typedefs 28 for t in \ 29 "float double void char int".split(): 30 self.lookup[t] = host.BasicType( t ) 31 for t in \ 32 "register signed unsigned short long const volatile inline".split(): # inline here ??? 33 self.lookup[t] = host.Qualifier( t ) 34 for t in "extern static auto".split(): 35 self.lookup[t] = host.StorageClass( t ) 36 self.lookup['typedef'] = host.Typedef() 37 #self.lookup['__inline__'] = host.GCCBuiltin('__inline__') 38 #self.lookup['__extension__'] = host.Qualifier('__extension__') 39 self.lookup['...'] = host.Ellipses() 40 if s: 41 self.lex(s) 42 for key in kw.keys(): 43 self.__dict__[key] = kw[key] 44 45 def lex(self,s): 46 self.stack = None 47 self.lines = s.splitlines() 48 self.set_state("","",0,0) 49 self.so_file = "" 50 self._newline() 51 self.get_token() # start 52 53 def mktypedef(self,tok,node): 54 if self.verbose: 55 print "%s.mktypedef(%s,%s)"%(self,tok,node) 56 self.lookup[ tok ] = node 57 58 def rmtypedef(self,tok): 59 " used in round trip testing " 60# print "# rmtypedef(%s)"%tok 61 assert isinstance( self.lookup[ tok ], host.Node ) # existance 62 del self.lookup[ tok ] 63 64 def _get_kind(self,tok): 65 #print '_get_kind(%s)'%tok,self.lookup 66 try: 67 return self.lookup[tok] 68 #return self.lookup[tok].clone() 69 except KeyError: 70 if tok.startswith("__builtin"): 71 node = host.GCCBuiltin(tok) 72 self.lookup[tok] = node 73 return node 74 #elif tok in ( "__extension__", ): 75 #node = GCCBuiltin(tok) 76 #self.lookup[tok] = node 77 #return node 78 return None 79 80 def _newline(self): 81 while self.lno < len(self.lines): 82 line = self.lines[self.lno] 83 if not line or line[0] != "#": 84 break 85 l = line.split('"') 86 assert len(l)>=2 87 self.so_file = l[1] 88 #self.so_lno = int( l[0].split()[1] ) 89 #sys.stderr.write("# %s %s: %s\n"%(so_lno,so_file,l)) 90 self.lno+=1 91 92 def get_brace_token( self ): 93 self.push_state() 94 ident_chars0 = string.letters+"_" 95 ident_chars1 = string.letters+string.digits+"_" 96 tok, kind = "", "" 97 while self.lno < len(self.lines): 98 s = self.lines[self.lno] 99 i=self.col 100 while i < len(s): 101 if s[i] not in '{}': 102 i=i+1 103 continue 104 else: 105 tok = s[i] 106 kind = tok 107 self.col = i+1 108 break 109 # keep moving 110 #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] ) 111 i=i+1 112 if i==len(s): 113 # nothing found 114 assert tok == "" 115 self.col=0 116 self.lno+=1 117 self._newline() 118 else: 119 assert tok 120 break 121 self.set_state(tok,kind,self.lno,self.col) 122 123 def get_token(self): 124 self.push_state() 125 ident_chars0 = string.letters+"_" 126 ident_chars1 = string.letters+string.digits+"_" 127 tok, kind = "", "" 128 while self.lno < len(self.lines): 129 s = self.lines[self.lno] 130 i=self.col 131 while i < len(s): 132 if s[i].isspace(): 133 i=i+1 134 continue 135 #if s[i] in ident_chars0: 136 if s[i].isalpha() or s[i]=='_': 137 # identifier 138 j=i+1 139 while j<len(s): 140 if s[j] in ident_chars1: 141 j=j+1 142 else: 143 break 144 tok = s[i:j] 145 self.col = j 146 kind = self._get_kind(tok) 147 break 148 if s[i].isdigit() or \ 149 (i+1<len(s) and s[i] in '+-.' and s[i+1].isdigit()): 150 # number literal 151 is_float = s[i]=='.' 152 is_hex = s[i:i+2]=='0x' 153 if is_hex: 154 i=i+2 155 assert s[i].isdigit() or s[i] in "abcdefABCDEF", self.err_string() 156 j=i+1 157 while j<len(s): 158 #print "lex ",repr(s[i]),is_float 159 if s[j].isdigit() or (is_hex and s[j] in "abcdefABCDEF"): 160 j=j+1 161 elif s[j]=='.' and not is_float: 162 assert not is_hex 163 j=j+1 164 is_float=1 165 else: 166 break 167 tok = s[i:j] 168 self.col = j 169 if is_float: 170 kind = float(tok) 171 elif is_hex: 172 kind = int(tok,16) 173 else: 174 kind = int(tok) 175 break 176 if s[i:i+3]=='...': 177 # ellipses 178 #sys.stderr.write( "ELLIPSES "+str(self.get_state()) ) 179 tok = s[i:i+3] 180 kind = self._get_kind(tok) 181 self.col = i+3 182 break 183 if s[i] in '*/{}()[]:;,=+-~.<>|&': 184 tok = s[i] 185 kind = tok 186 self.col = i+1 187 break 188 if s[i] == "'": 189 j = i+2 190 while j<len(s) and s[j]!="'": 191 j+=1 192 if j==len(s): 193 raise LexError( self.err_string() + "unterminated char constant" ) 194 tok = s[i:j+1] 195 self.col = j+1 196 kind = s[i:j+1] 197 break 198 # keep moving 199 #sys.stderr.write( "lexer ignoring '%s'\n"%s[i] ) 200 sys.stderr.write( "lexer ignoring '%s' lno=%d\n"%(s[i],self.lno+1) ) 201 i=i+1 202 # end while i < len(s) 203 if i==len(s): 204 # nothing found, go to next line 205 assert tok == "" 206 self.col=0 207 self.lno+=1 208 self._newline() 209 else: 210 # we got one 211 assert tok 212 break 213 # end while self.lno < len(self.lines): 214 self.set_state(tok,kind,self.lno,self.col) 215 216 def err_string(self): 217 "Return helpful error string :)" 218 return self.lines[self.lno]+"\n"+" "*self.col+"^\n" 219 220 def push_state(self): 221 self.stack = self.get_state() # a short stack :) 222 #self.stack.push( self.get_state() ) 223 224 def unget_token(self): 225 assert self.stack is not None 226 self.set_state(*self.stack) 227 self.stack = None 228 229 def set_state(self,tok,kind,lno,col): 230 if self.verbose: 231 print "tok,kind,lno,col = ",(tok,kind,lno,col) 232 self.tok = tok 233 self.kind = kind 234 self.lno = lno # line 235 self.col = col # column 236 237 def get_state(self): 238 return self.tok,self.kind,self.lno,self.col 239 240 def get_file(self): 241 return self.so_file 242 243################################################################### 244# 245################################################################### 246# 247 248 249