apibuild.py revision d433046ae52b0488a9fad77d2045c0139b8cada8
1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14#
15# C parser analysis code
16#
17ignored_files = {
18  "trio": "too many non standard macros",
19  "trio.c": "too many non standard macros",
20  "trionan.c": "too many non standard macros",
21  "triostr.c": "too many non standard macros",
22  "acconfig.h": "generated portability layer",
23  "config.h": "generated portability layer",
24  "libxml.h": "internal only",
25  "testOOM.c": "out of memory tester",
26  "testOOMlib.h": "out of memory tester",
27  "testOOMlib.c": "out of memory tester",
28}
29
30ignored_words = {
31  "WINAPI": (0, "Windows keyword"),
32  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
33  "__declspec": (3, "Windows keyword"),
34  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
35  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
36  "X_IN_Y": (5, "macro function builder"),
37}
38
39def escape(raw):
40    raw = string.replace(raw, '&', '&')
41    raw = string.replace(raw, '<', '&lt;')
42    raw = string.replace(raw, '>', '&gt;')
43    raw = string.replace(raw, "'", '&apos;')
44    raw = string.replace(raw, '"', '&quot;')
45    return raw
46
47class identifier:
48     def __init__(self, name, module=None, type=None, info=None, extra=None):
49         self.name = name
50	 self.module = module
51	 self.type = type
52	 self.info = info
53	 self.extra = extra
54	 self.static = 0
55
56     def __repr__(self):
57         r = "%s %s:" % (self.type, self.name)
58	 if self.static:
59	     r = r + " static"
60	 if self.module != None:
61	     r = r + " from %s" % (self.module)
62	 if self.info != None:
63	     r = r + " " +  `self.info`
64	 if self.extra != None:
65	     r = r + " " + `self.extra`
66	 return r
67
68
69     def set_module(self, module):
70         self.module = module
71     def set_type(self, type):
72         self.type = type
73     def set_info(self, info):
74         self.info = info
75     def set_extra(self, extra):
76         self.extra = extra
77     def set_static(self, static):
78         self.static = static
79
80     def update(self, module, type = None, info = None, extra=None):
81         if module != None and self.module == None:
82	     self.set_module(module)
83         if type != None and self.type == None:
84	     self.set_type(type)
85         if info != None:
86	     self.set_info(info)
87         if extra != None:
88	     self.set_extra(extra)
89
90
91class index:
92     def __init__(self, name = "noname"):
93         self.name = name;
94         self.identifiers = {}
95         self.functions = {}
96	 self.variables = {}
97	 self.includes = {}
98	 self.structs = {}
99	 self.enums = {}
100	 self.typedefs = {}
101	 self.macros = {}
102	 self.references = {}
103
104     def add(self, name, module, static, type, info=None, extra=None):
105         if name[0:2] == '__':
106	     return None
107         d = None
108         try:
109	    d = self.identifiers[name]
110	    d.update(module, type, info, extra)
111	 except:
112	    d = identifier(name, module, type, info, extra)
113	    self.identifiers[name] = d
114
115	 if d != None and static == 1:
116	     d.set_static(1)
117
118	 if d != None and name != None and type != None:
119	     if type == "function":
120	         self.functions[name] = d
121	     elif type == "functype":
122	         self.functions[name] = d
123	     elif type == "variable":
124	         self.variables[name] = d
125	     elif type == "include":
126	         self.includes[name] = d
127	     elif type == "struct":
128	         self.structs[name] = d
129	     elif type == "enum":
130	         self.enums[name] = d
131	     elif type == "typedef":
132	         self.typedefs[name] = d
133	     elif type == "macro":
134	         self.macros[name] = d
135	     else:
136	         print "Unable to register type ", type
137	 return d
138
139     def merge(self, idx):
140         for id in idx.functions.keys():
141              #
142              # macro might be used to override functions or variables
143              # definitions
144              #
145	      if self.macros.has_key(id):
146	          del self.macros[id]
147	      if self.functions.has_key(id):
148	          print "function %s from %s redeclared in %s" % (
149		     id, self.functions[id].module, idx.functions[id].module)
150	      else:
151	          self.functions[id] = idx.functions[id]
152		  self.identifiers[id] = idx.functions[id]
153         for id in idx.variables.keys():
154              #
155              # macro might be used to override functions or variables
156              # definitions
157              #
158	      if self.macros.has_key(id):
159	          del self.macros[id]
160	      if self.variables.has_key(id):
161	          print "variable %s from %s redeclared in %s" % (
162		     id, self.variables[id].module, idx.variables[id].module)
163	      else:
164	          self.variables[id] = idx.variables[id]
165		  self.identifiers[id] = idx.variables[id]
166         for id in idx.structs.keys():
167	      if self.structs.has_key(id):
168	          print "struct %s from %s redeclared in %s" % (
169		     id, self.structs[id].module, idx.structs[id].module)
170	      else:
171	          self.structs[id] = idx.structs[id]
172		  self.identifiers[id] = idx.structs[id]
173         for id in idx.typedefs.keys():
174	      if self.typedefs.has_key(id):
175	          print "typedef %s from %s redeclared in %s" % (
176		     id, self.typedefs[id].module, idx.typedefs[id].module)
177	      else:
178	          self.typedefs[id] = idx.typedefs[id]
179		  self.identifiers[id] = idx.typedefs[id]
180         for id in idx.macros.keys():
181              #
182              # macro might be used to override functions or variables
183              # definitions
184              #
185              if self.variables.has_key(id):
186                  continue
187              if self.functions.has_key(id):
188                  continue
189              if self.enums.has_key(id):
190                  continue
191	      if self.macros.has_key(id):
192	          print "macro %s from %s redeclared in %s" % (
193		     id, self.macros[id].module, idx.macros[id].module)
194	      else:
195	          self.macros[id] = idx.macros[id]
196		  self.identifiers[id] = idx.macros[id]
197         for id in idx.enums.keys():
198	      if self.enums.has_key(id):
199	          print "enum %s from %s redeclared in %s" % (
200		     id, self.enums[id].module, idx.enums[id].module)
201	      else:
202	          self.enums[id] = idx.enums[id]
203		  self.identifiers[id] = idx.enums[id]
204
205     def merge_public(self, idx):
206         for id in idx.functions.keys():
207	      if self.functions.has_key(id):
208	          up = idx.functions[id]
209	          self.functions[id].update(None, up.type, up.info, up.extra)
210	 #     else:
211	 #         print "Function %s from %s is not declared in headers" % (
212	#	        id, idx.functions[id].module)
213	 # TODO: do the same for variables.
214
215     def analyze_dict(self, type, dict):
216         count = 0
217	 public = 0
218         for name in dict.keys():
219	     id = dict[name]
220	     count = count + 1
221	     if id.static == 0:
222	         public = public + 1
223         if count != public:
224	     print "  %d %s , %d public" % (count, type, public)
225	 elif count != 0:
226	     print "  %d public %s" % (count, type)
227
228
229     def analyze(self):
230	 self.analyze_dict("functions", self.functions)
231	 self.analyze_dict("variables", self.variables)
232	 self.analyze_dict("structs", self.structs)
233	 self.analyze_dict("typedefs", self.typedefs)
234	 self.analyze_dict("macros", self.macros)
235
236class CLexer:
237     """A lexer for the C language, tokenize the input by reading and
238        analyzing it line by line"""
239     def __init__(self, input):
240         self.input = input
241	 self.tokens = []
242	 self.line = ""
243	 self.lineno = 0
244
245     def getline(self):
246         line = ''
247	 while line == '':
248	     line = self.input.readline()
249	     if not line:
250		 return None
251	     self.lineno = self.lineno + 1
252	     line = string.lstrip(line)
253	     line = string.rstrip(line)
254	     if line == '':
255	         continue
256	     while line[-1] == '\\':
257	         line = line[:-1]
258		 n = self.input.readline()
259		 self.lineno = self.lineno + 1
260		 n = string.lstrip(n)
261		 n = string.rstrip(n)
262		 if not n:
263		     break
264		 else:
265		     line = line + n
266         return line
267
268     def getlineno(self):
269         return self.lineno
270
271     def push(self, token):
272         self.tokens.insert(0, token);
273
274     def debug(self):
275         print "Last token: ", self.last
276	 print "Token queue: ", self.tokens
277	 print "Line %d end: " % (self.lineno), self.line
278
279     def token(self):
280         while self.tokens == []:
281	     if self.line == "":
282		 line = self.getline()
283	     else:
284	         line = self.line
285		 self.line = ""
286	     if line == None:
287	         return None
288
289	     if line[0] == '#':
290	         self.tokens = map((lambda x: ('preproc', x)),
291		                   string.split(line))
292		 break;
293	     l = len(line)
294	     if line[0] == '"' or line[0] == "'":
295	         end = line[0]
296	         line = line[1:]
297		 found = 0
298		 tok = ""
299		 while found == 0:
300		     i = 0
301		     l = len(line)
302		     while i < l:
303			 if line[i] == end:
304			     self.line = line[i+1:]
305			     line = line[:i]
306			     l = i
307			     found = 1
308			     break
309			 if line[i] == '\\':
310			     i = i + 1
311			 i = i + 1
312		     tok = tok + line
313		     if found == 0:
314		         line = self.getline()
315			 if line == None:
316			     return None
317		 self.last = ('string', tok)
318		 return self.last
319
320	     if l >= 2 and line[0] == '/' and line[1] == '*':
321	         line = line[2:]
322		 found = 0
323		 tok = ""
324		 while found == 0:
325		     i = 0
326		     l = len(line)
327		     while i < l:
328			 if line[i] == '*' and i+1 < l and line[i+1] == '/':
329			     self.line = line[i+2:]
330			     line = line[:i-1]
331			     l = i
332			     found = 1
333			     break
334			 i = i + 1
335	             if tok != "":
336		         tok = tok + "\n"
337		     tok = tok + line
338		     if found == 0:
339		         line = self.getline()
340			 if line == None:
341			     return None
342		 self.last = ('comment', tok)
343		 return self.last
344	     if l >= 2 and line[0] == '/' and line[1] == '/':
345	         line = line[2:]
346		 self.last = ('comment', line)
347		 return self.last
348	     i = 0
349	     while i < l:
350	         if line[i] == '/' and i+1 < l and line[i+1] == '/':
351		     self.line = line[i:]
352		     line = line[:i]
353		     break
354	         if line[i] == '/' and i+1 < l and line[i+1] == '*':
355		     self.line = line[i:]
356		     line = line[:i]
357		     break
358		 if line[i] == '"' or line[i] == "'":
359		     self.line = line[i:]
360		     line = line[:i]
361		     break
362		 i = i + 1
363	     l = len(line)
364	     i = 0
365	     while i < l:
366	         if line[i] == ' ' or line[i] == '\t':
367		     i = i + 1
368		     continue
369		 o = ord(line[i])
370		 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
371		    (o >= 48 and o <= 57):
372		     s = i
373		     while i < l:
374			 o = ord(line[i])
375			 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
376			    (o >= 48 and o <= 57) or string.find(
377			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
378			     i = i + 1
379			 else:
380			     break
381		     self.tokens.append(('name', line[s:i]))
382		     continue
383		 if string.find("(){}:;,[]", line[i]) != -1:
384#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
385#		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
386#		    line[i] == ',' or line[i] == '[' or line[i] == ']':
387		     self.tokens.append(('sep', line[i]))
388		     i = i + 1
389		     continue
390		 if string.find("+-*><=/%&!|.", line[i]) != -1:
391#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
392#		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
393#		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
394#		    line[i] == '!' or line[i] == '|' or line[i] == '.':
395		     if line[i] == '.' and  i + 2 < l and \
396		        line[i+1] == '.' and line[i+2] == '.':
397			 self.tokens.append(('name', '...'))
398			 i = i + 3
399			 continue
400
401		     j = i + 1
402		     if j < l and (
403		        string.find("+-*><=/%&!|", line[j]) != -1):
404#		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
405#			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
406#			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
407#			line[j] == '!' or line[j] == '|'):
408			 self.tokens.append(('op', line[i:j+1]))
409			 i = j + 1
410		     else:
411			 self.tokens.append(('op', line[i]))
412			 i = i + 1
413		     continue
414		 s = i
415		 while i < l:
416		     o = ord(line[i])
417		     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
418		        (o >= 48 and o <= 57) or (
419		         string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
420#		         line[i] != ' ' and line[i] != '\t' and
421#			 line[i] != '(' and line[i] != ')' and
422#			 line[i] != '{'  and line[i] != '}' and
423#			 line[i] != ':' and line[i] != ';' and
424#			 line[i] != ',' and line[i] != '+' and
425#			 line[i] != '-' and line[i] != '*' and
426#			 line[i] != '/' and line[i] != '%' and
427#			 line[i] != '&' and line[i] != '!' and
428#			 line[i] != '|' and line[i] != '[' and
429#			 line[i] != ']' and line[i] != '=' and
430#			 line[i] != '*' and line[i] != '>' and
431#			 line[i] != '<'):
432			 i = i + 1
433		     else:
434		         break
435		 self.tokens.append(('name', line[s:i]))
436
437	 tok = self.tokens[0]
438	 self.tokens = self.tokens[1:]
439	 self.last = tok
440	 return tok
441
442class CParser:
443     """The C module parser"""
444     def __init__(self, filename, idx = None):
445         self.filename = filename
446	 if len(filename) > 2 and filename[-2:] == '.h':
447	     self.is_header = 1
448	 else:
449	     self.is_header = 0
450         self.input = open(filename)
451	 self.lexer = CLexer(self.input)
452	 if idx == None:
453	     self.index = index()
454	 else:
455	     self.index = idx
456	 self.top_comment = ""
457	 self.last_comment = ""
458	 self.comment = None
459
460     def lineno(self):
461         return self.lexer.getlineno()
462
463     def error(self, msg, token=-1):
464         print "Parse Error: " + msg
465	 if token != -1:
466	     print "Got token ", token
467	 self.lexer.debug()
468	 sys.exit(1)
469
470     def debug(self, msg, token=-1):
471         print "Debug: " + msg
472	 if token != -1:
473	     print "Got token ", token
474	 self.lexer.debug()
475
476     def parseComment(self, token):
477         if self.top_comment == "":
478	     self.top_comment = token[1]
479	 if self.comment == None or token[1][0] == '*':
480	     self.comment = token[1];
481	 else:
482	     self.comment = self.comment + token[1]
483	 token = self.lexer.token()
484	 return token
485
486     #
487     # Parse a comment block associate to a macro
488     #
489     def parseMacroComment(self, name, quiet = 0):
490         if name[0:2] == '__':
491	     quiet = 1
492
493         args = []
494	 desc = ""
495
496         if self.comment == None:
497	     if not quiet:
498		 print "Missing comment for macro %s" % (name)
499	     return((args, desc))
500         if self.comment[0] != '*':
501	     if not quiet:
502		 print "Missing * in macro comment for %s" % (name)
503	     return((args, desc))
504	 lines = string.split(self.comment, '\n')
505	 if lines[0] == '*':
506	     del lines[0]
507	 if lines[0] != "* %s:" % (name):
508	     if not quiet:
509		 print "Misformatted macro comment for %s" % (name)
510		 print "  Expecting '* %s:' got '%s'" % (name, lines[0])
511	     return((args, desc))
512	 del lines[0]
513	 while lines[0] == '*':
514	     del lines[0]
515	 while len(lines) > 0 and lines[0][0:3] == '* @':
516	     l = lines[0][3:]
517	     try:
518	         (arg, desc) = string.split(l, ':', 1)
519		 desc=string.strip(desc)
520		 arg=string.strip(arg)
521             except:
522		 if not quiet:
523		     print "Misformatted macro comment for %s" % (name)
524		     print "  problem with '%s'" % (lines[0])
525		 del lines[0]
526		 continue
527	     del lines[0]
528	     l = string.strip(lines[0])
529	     while len(l) > 2 and l[0:3] != '* @':
530	         while l[0] == '*':
531		     l = l[1:]
532		 desc = desc + ' ' + string.strip(l)
533		 del lines[0]
534		 if len(lines) == 0:
535		     break
536		 l = lines[0]
537             args.append((arg, desc))
538	 while len(lines) > 0 and lines[0] == '*':
539	     del lines[0]
540	 desc = ""
541	 while len(lines) > 0:
542	     l = lines[0]
543	     while len(l) > 0 and l[0] == '*':
544	         l = l[1:]
545	     l = string.strip(l)
546	     desc = desc + " " + l
547	     del lines[0]
548
549	 desc = string.strip(desc)
550
551	 if quiet == 0:
552	     if desc == "":
553	         print "Macro comment for %s lack description of the macro" % (name)
554
555	 return((args, desc))
556
557     #
558     # Parse a comment block and merge the informations found in the
559     # parameters descriptions, finally returns a block as complete
560     # as possible
561     #
562     def mergeFunctionComment(self, name, description, quiet = 0):
563         if name == 'main':
564	     quiet = 1
565         if name[0:2] == '__':
566	     quiet = 1
567
568	 (ret, args) = description
569	 desc = ""
570	 retdesc = ""
571
572         if self.comment == None:
573	     if not quiet:
574		 print "Missing comment for function %s" % (name)
575	     return(((ret[0], retdesc), args, desc))
576         if self.comment[0] != '*':
577	     if not quiet:
578		 print "Missing * in function comment for %s" % (name)
579	     return(((ret[0], retdesc), args, desc))
580	 lines = string.split(self.comment, '\n')
581	 if lines[0] == '*':
582	     del lines[0]
583	 if lines[0] != "* %s:" % (name):
584	     if not quiet:
585		 print "Misformatted function comment for %s" % (name)
586		 print "  Expecting '* %s:' got '%s'" % (name, lines[0])
587	     return(((ret[0], retdesc), args, desc))
588	 del lines[0]
589	 while lines[0] == '*':
590	     del lines[0]
591	 nbargs = len(args)
592	 while len(lines) > 0 and lines[0][0:3] == '* @':
593	     l = lines[0][3:]
594	     try:
595	         (arg, desc) = string.split(l, ':', 1)
596		 desc=string.strip(desc)
597		 arg=string.strip(arg)
598             except:
599		 if not quiet:
600		     print "Misformatted function comment for %s" % (name)
601		     print "  problem with '%s'" % (lines[0])
602		 del lines[0]
603		 continue
604	     del lines[0]
605	     l = string.strip(lines[0])
606	     while len(l) > 2 and l[0:3] != '* @':
607	         while l[0] == '*':
608		     l = l[1:]
609		 desc = desc + ' ' + string.strip(l)
610		 del lines[0]
611		 if len(lines) == 0:
612		     break
613		 l = lines[0]
614	     i = 0
615	     while i < nbargs:
616	         if args[i][1] == arg:
617		     args[i] = (args[i][0], arg, desc)
618		     break;
619		 i = i + 1
620	     if i >= nbargs:
621		 if not quiet:
622		     print "Uname to find arg %s from function comment for %s" % (
623		        arg, name)
624	 while len(lines) > 0 and lines[0] == '*':
625	     del lines[0]
626	 desc = ""
627	 while len(lines) > 0:
628	     l = lines[0]
629	     while len(l) > 0 and l[0] == '*':
630	         l = l[1:]
631	     l = string.strip(l)
632	     if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
633	         try:
634		     l = string.split(l, ' ', 1)[1]
635		 except:
636		     l = ""
637		 retdesc = string.strip(l)
638		 del lines[0]
639		 while len(lines) > 0:
640		     l = lines[0]
641		     while len(l) > 0 and l[0] == '*':
642			 l = l[1:]
643		     l = string.strip(l)
644		     retdesc = retdesc + " " + l
645		     del lines[0]
646	     else:
647	         desc = desc + " " + l
648		 del lines[0]
649
650	 retdesc = string.strip(retdesc)
651	 desc = string.strip(desc)
652
653	 if quiet == 0:
654	     #
655	     # report missing comments
656	     #
657	     i = 0
658	     while i < nbargs:
659	         if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
660		     print "Function comment for %s lack description of arg %s" % (name, args[i][1])
661		 i = i + 1
662	     if retdesc == "" and ret[0] != "void":
663		 print "Function comment for %s lack description of return value" % (name)
664	     if desc == "":
665	         print "Function comment for %s lack description of the function" % (name)
666
667
668	 return(((ret[0], retdesc), args, desc))
669
670     def parsePreproc(self, token):
671         name = token[1]
672	 if name == "#include":
673	     token = self.lexer.token()
674	     if token == None:
675	         return None
676	     if token[0] == 'preproc':
677		 self.index.add(token[1], self.filename, not self.is_header,
678		                "include")
679		 return self.lexer.token()
680	     return token
681	 if name == "#define":
682	     token = self.lexer.token()
683	     if token == None:
684	         return None
685	     if token[0] == 'preproc':
686	         # TODO macros with arguments
687		 name = token[1]
688	         lst = []
689		 token = self.lexer.token()
690		 while token != None and token[0] == 'preproc' and \
691		       token[1][0] != '#':
692		     lst.append(token[1])
693		     token = self.lexer.token()
694                 try:
695		     name = string.split(name, '(') [0]
696                 except:
697                     pass
698                 info = self.parseMacroComment(name, not self.is_header)
699		 self.index.add(name, self.filename, not self.is_header,
700		                "macro", info)
701		 return token
702	 token = self.lexer.token()
703	 while token != None and token[0] == 'preproc' and \
704	     token[1][0] != '#':
705	     token = self.lexer.token()
706	 return token
707
708     #
709     # token acquisition on top of the lexer, it handle internally
710     # preprocessor and comments since they are logically not part of
711     # the program structure.
712     #
713     def token(self):
714         global ignored_words
715
716         token = self.lexer.token()
717	 while token != None:
718	     if token[0] == 'comment':
719		 token = self.parseComment(token)
720		 continue
721	     elif token[0] == 'preproc':
722		 token = self.parsePreproc(token)
723		 continue
724	     elif token[0] == "name" and ignored_words.has_key(token[1]):
725	         (n, info) = ignored_words[token[1]]
726		 i = 0
727		 while i < n:
728		     token = self.lexer.token()
729		     i = i + 1
730		 token = self.lexer.token()
731		 continue
732	     else:
733	         #print "=> ", token
734	         return token
735	 return None
736
737     #
738     # Parse a typedef, it records the type and its name.
739     #
740     def parseTypedef(self, token):
741         if token == None:
742	     return None
743	 token = self.parseType(token)
744	 if token == None:
745	     self.error("parsing typedef")
746	     return None
747	 base_type = self.type
748	 type = base_type
749	 #self.debug("end typedef type", token)
750	 while token != None:
751	     if token[0] == "name":
752		 name = token[1]
753		 signature = self.signature
754		 if signature != None:
755		     type = string.split(type, '(')[0]
756		     d = self.mergeFunctionComment(name,
757			     ((type, None), signature), 1)
758		     self.index.add(name, self.filename, not self.is_header,
759				    "functype", d)
760		 else:
761		     if base_type == "struct":
762			 self.index.add(name, self.filename, not self.is_header,
763					"struct", type)
764			 base_type = "struct " + name
765	             else:
766			 self.index.add(name, self.filename, not self.is_header,
767		                    "typedef", type)
768		 token = self.token()
769	     else:
770		 self.error("parsing typedef: expecting a name")
771		 return token
772	     #self.debug("end typedef", token)
773	     if token != None and token[0] == 'sep' and token[1] == ',':
774	         type = base_type
775	         token = self.token()
776		 while token != None and token[0] == "op":
777		     type = type + token[1]
778		     token = self.token()
779	     elif token != None and token[0] == 'sep' and token[1] == ';':
780	         break;
781	     elif token != None and token[0] == 'name':
782	         type = base_type
783	         continue;
784	     else:
785		 self.error("parsing typedef: expecting ';'", token)
786		 return token
787	 token = self.token()
788	 return token
789
790     #
791     # Parse a C code block, used for functions it parse till
792     # the balancing } included
793     #
794     def parseBlock(self, token):
795         while token != None:
796	     if token[0] == "sep" and token[1] == "{":
797	         token = self.token()
798		 token = self.parseBlock(token)
799	     elif token[0] == "sep" and token[1] == "}":
800	         self.comment = None
801	         token = self.token()
802		 return token
803	     else:
804	         token = self.token()
805	 return token
806
807     #
808     # Parse a C struct definition till the balancing }
809     #
810     def parseStruct(self, token):
811         fields = []
812	 #self.debug("start parseStruct", token)
813         while token != None:
814	     if token[0] == "sep" and token[1] == "{":
815	         token = self.token()
816		 token = self.parseTypeBlock(token)
817	     elif token[0] == "sep" and token[1] == "}":
818		 self.struct_fields = fields
819		 #self.debug("end parseStruct", token)
820		 #print fields
821	         token = self.token()
822		 return token
823	     else:
824	         base_type = self.type
825		 #self.debug("before parseType", token)
826		 token = self.parseType(token)
827		 #self.debug("after parseType", token)
828		 if token != None and token[0] == "name":
829		     fname = token[1]
830		     token = self.token()
831		     if token[0] == "sep" and token[1] == ";":
832		         self.comment = None
833		         token = self.token()
834			 fields.append((self.type, fname, self.comment))
835			 self.comment = None
836		     else:
837		         self.error("parseStruct: expecting ;", token)
838		 elif token != None and token[0] == "sep" and token[1] == "{":
839		     token = self.token()
840		     token = self.parseTypeBlock(token)
841		     if token != None and token[0] == "name":
842			 token = self.token()
843		     if token != None and token[0] == "sep" and token[1] == ";":
844			 token = self.token()
845		     else:
846		         self.error("parseStruct: expecting ;", token)
847		 else:
848		     self.error("parseStruct: name", token)
849		     token = self.token()
850		 self.type = base_type;
851         self.struct_fields = fields
852	 #self.debug("end parseStruct", token)
853	 #print fields
854	 return token
855
856     #
857     # Parse a C enum block, parse till the balancing }
858     #
859     def parseEnumBlock(self, token):
860         self.enums = []
861	 name = None
862	 self.comment = None
863	 comment = ""
864	 value = "0"
865         while token != None:
866	     if token[0] == "sep" and token[1] == "{":
867	         token = self.token()
868		 token = self.parseTypeBlock(token)
869	     elif token[0] == "sep" and token[1] == "}":
870		 if name != None:
871		     if self.comment != None:
872			 comment = self.comment
873			 self.comment = None
874		     self.enums.append((name, value, comment))
875	         token = self.token()
876		 return token
877	     elif token[0] == "name":
878		     if name != None:
879			 if self.comment != None:
880			     comment = string.strip(self.comment)
881			     self.comment = None
882			 self.enums.append((name, value, comment))
883		     name = token[1]
884		     comment = ""
885		     token = self.token()
886		     if token[0] == "op" and token[1][0] == "=":
887		         value = ""
888		         if len(token[1]) > 1:
889			     value = token[1][1:]
890		         token = self.token()
891		         while token[0] != "sep" or (token[1] != ',' and
892			       token[1] != '}'):
893			     value = value + token[1]
894			     token = self.token()
895		     else:
896		         try:
897			     value = "%d" % (int(value) + 1)
898			 except:
899			     print "Failed to compute value of enum %s" % (name)
900			     value=""
901		     if token[0] == "sep" and token[1] == ",":
902			 token = self.token()
903	     else:
904	         token = self.token()
905	 return token
906
907     #
908     # Parse a C definition block, used for structs it parse till
909     # the balancing }
910     #
911     def parseTypeBlock(self, token):
912         while token != None:
913	     if token[0] == "sep" and token[1] == "{":
914	         token = self.token()
915		 token = self.parseTypeBlock(token)
916	     elif token[0] == "sep" and token[1] == "}":
917	         token = self.token()
918		 return token
919	     else:
920	         token = self.token()
921	 return token
922
923     #
924     # Parse a type: the fact that the type name can either occur after
925     #    the definition or within the definition makes it a little harder
926     #    if inside, the name token is pushed back before returning
927     #
928     def parseType(self, token):
929         self.type = ""
930	 self.struct_fields = []
931         self.signature = None
932	 if token == None:
933	     return token
934
935	 while token[0] == "name" and (
936	       token[1] == "const" or token[1] == "unsigned"):
937	     if self.type == "":
938	         self.type = token[1]
939	     else:
940	         self.type = self.type + " " + token[1]
941	     token = self.token()
942
943         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
944	     if self.type == "":
945	         self.type = token[1]
946	     else:
947	         self.type = self.type + " " + token[1]
948	     if token[0] == "name" and token[1] == "int":
949		 if self.type == "":
950		     self.type = tmp[1]
951		 else:
952		     self.type = self.type + " " + tmp[1]
953
954         elif token[0] == "name" and token[1] == "struct":
955	     if self.type == "":
956	         self.type = token[1]
957	     else:
958	         self.type = self.type + " " + token[1]
959	     token = self.token()
960	     nametok = None
961	     if token[0] == "name":
962	         nametok = token
963		 token = self.token()
964	     if token != None and token[0] == "sep" and token[1] == "{":
965		 token = self.token()
966		 token = self.parseStruct(token)
967	     elif token != None and token[0] == "op" and token[1] == "*":
968	         self.type = self.type + " " + nametok[1] + " *"
969		 token = self.token()
970		 while token != None and token[0] == "op" and token[1] == "*":
971		     self.type = self.type + " *"
972		     token = self.token()
973		 if token[0] == "name":
974		     nametok = token
975		     token = self.token()
976		 else:
977		     self.error("struct : expecting name", token)
978		     return token
979	     elif token != None and token[0] == "name" and nametok != None:
980	         self.type = self.type + " " + nametok[1]
981		 return token
982
983	     if nametok != None:
984		 self.lexer.push(token)
985		 token = nametok
986	     return token
987
988         elif token[0] == "name" and token[1] == "enum":
989	     if self.type == "":
990	         self.type = token[1]
991	     else:
992	         self.type = self.type + " " + token[1]
993	     self.enums = []
994	     token = self.token()
995	     if token != None and token[0] == "sep" and token[1] == "{":
996		 token = self.token()
997		 token = self.parseEnumBlock(token)
998	     else:
999		 self.error("parsing enum: expecting '{'", token)
1000	     enum_type = None
1001	     if token != None and token[0] != "name":
1002	         self.lexer.push(token)
1003	         token = ("name", "enum")
1004	     else:
1005	         enum_type = token[1]
1006	     for enum in self.enums:
1007		 self.index.add(enum[0], self.filename,
1008			        not self.is_header, "enum",
1009			        (enum[1], enum[2], enum_type))
1010	     return token
1011
1012	 elif token[0] == "name":
1013	     if self.type == "":
1014	         self.type = token[1]
1015	     else:
1016	         self.type = self.type + " " + token[1]
1017	 else:
1018	     self.error("parsing type %s: expecting a name" % (self.type),
1019	                token)
1020	     return token
1021	 token = self.token()
1022         while token != None and (token[0] == "op" or
1023	       token[0] == "name" and token[1] == "const"):
1024	     self.type = self.type + " " + token[1]
1025	     token = self.token()
1026
1027	 #
1028	 # if there is a parenthesis here, this means a function type
1029	 #
1030	 if token != None and token[0] == "sep" and token[1] == '(':
1031	     self.type = self.type + token[1]
1032	     token = self.token()
1033	     while token != None and token[0] == "op" and token[1] == '*':
1034	         self.type = self.type + token[1]
1035		 token = self.token()
1036	     if token == None or token[0] != "name" :
1037		 self.error("parsing function type, name expected", token);
1038	         return token
1039	     self.type = self.type + token[1]
1040	     nametok = token
1041	     token = self.token()
1042	     if token != None and token[0] == "sep" and token[1] == ')':
1043		 self.type = self.type + token[1]
1044		 token = self.token()
1045		 if token != None and token[0] == "sep" and token[1] == '(':
1046		     token = self.token()
1047		     type = self.type;
1048		     token = self.parseSignature(token);
1049		     self.type = type;
1050		 else:
1051		     self.error("parsing function type, '(' expected", token);
1052		     return token
1053	     else:
1054	         self.error("parsing function type, ')' expected", token);
1055		 return token
1056	     self.lexer.push(token)
1057	     token = nametok
1058	     return token
1059
1060         #
1061	 # do some lookahead for arrays
1062	 #
1063	 if token != None and token[0] == "name":
1064	     nametok = token
1065	     token = self.token()
1066	     if token != None and token[0] == "sep" and token[1] == '[':
1067	         self.type = self.type + nametok[1]
1068		 while token != None and token[0] == "sep" and token[1] == '[':
1069		     self.type = self.type + token[1]
1070		     token = self.token()
1071		     while token != None and token[0] != 'sep' and \
1072		           token[1] != ']' and token[1] != ';':
1073			 self.type = self.type + token[1]
1074			 token = self.token()
1075		 if token != None and token[0] == 'sep' and token[1] == ']':
1076		     self.type = self.type + token[1]
1077		     token = self.token()
1078		 else:
1079		     self.error("parsing array type, ']' expected", token);
1080		     return token
1081	     elif token != None and token[0] == "sep" and token[1] == ':':
1082	         # remove :12 in case it's a limited int size
1083		 token = self.token()
1084		 token = self.token()
1085	     self.lexer.push(token)
1086	     token = nametok
1087
1088	 return token
1089
1090     #
1091     # Parse a signature: '(' has been parsed and we scan the type definition
1092     #    up to the ')' included
1093     def parseSignature(self, token):
1094         signature = []
1095	 if token != None and token[0] == "sep" and token[1] == ')':
1096	     self.signature = []
1097	     token = self.token()
1098	     return token
1099	 while token != None:
1100	     token = self.parseType(token)
1101	     if token != None and token[0] == "name":
1102	         signature.append((self.type, token[1], None))
1103		 token = self.token()
1104	     elif token != None and token[0] == "sep" and token[1] == ',':
1105		 token = self.token()
1106		 continue
1107	     elif token != None and token[0] == "sep" and token[1] == ')':
1108	         # only the type was provided
1109		 if self.type == "...":
1110		     signature.append((self.type, "...", None))
1111		 else:
1112		     signature.append((self.type, None, None))
1113	     if token != None and token[0] == "sep":
1114	         if token[1] == ',':
1115		     token = self.token()
1116		     continue
1117		 elif token[1] == ')':
1118		     token = self.token()
1119		     break
1120	 self.signature = signature
1121	 return token
1122
1123     #
1124     # Parse a global definition, be it a type, variable or function
1125     # the extern "C" blocks are a bit nasty and require it to recurse.
1126     #
1127     def parseGlobal(self, token):
1128         static = 0
1129         if token[1] == 'extern':
1130	     token = self.token()
1131	     if token == None:
1132	         return token
1133	     if token[0] == 'string':
1134	         if token[1] == 'C':
1135		     token = self.token()
1136		     if token == None:
1137			 return token
1138		     if token[0] == 'sep' and token[1] == "{":
1139		         token = self.token()
1140#			 print 'Entering extern "C line ', self.lineno()
1141			 while token != None and (token[0] != 'sep' or
1142			       token[1] != "}"):
1143			     if token[0] == 'name':
1144				 token = self.parseGlobal(token)
1145			     else:
1146				 self.error(
1147				 "token %s %s unexpected at the top level" % (
1148					token[0], token[1]))
1149				 token = self.parseGlobal(token)
1150#			 print 'Exiting extern "C" line', self.lineno()
1151			 token = self.token()
1152			 return token
1153		 else:
1154		     return token
1155	 elif token[1] == 'static':
1156	     static = 1
1157	     token = self.token()
1158	     if token == None or  token[0] != 'name':
1159	         return token
1160
1161	 if token[1] == 'typedef':
1162	     token = self.token()
1163	     return self.parseTypedef(token)
1164	 else:
1165	     token = self.parseType(token)
1166	     type_orig = self.type
1167	 if token == None or token[0] != "name":
1168	     return token
1169	 type = type_orig
1170	 self.name = token[1]
1171	 token = self.token()
1172	 while token != None and (token[0] == "sep" or token[0] == "op"):
1173	     if token[0] == "sep":
1174		 if token[1] == "[":
1175		     type = type + token[1]
1176		     token = self.token()
1177		     while token != None and (token[0] != "sep" or \
1178		           token[1] != ";"):
1179			 type = type + token[1]
1180			 token = self.token()
1181
1182	     if token != None and token[0] == "op" and token[1] == "=":
1183		 #
1184		 # Skip the initialization of the variable
1185		 #
1186		 token = self.token()
1187		 if token[0] == 'sep' and token[1] == '{':
1188		     token = self.token()
1189		     token = self.parseBlock(token)
1190		 else:
1191		     self.comment = None
1192		     while token != None and (token[0] != "sep" or \
1193			   (token[1] != ';' and token[1] != ',')):
1194			     token = self.token()
1195		 self.comment = None
1196		 if token == None or token[0] != "sep" or (token[1] != ';' and
1197		    token[1] != ','):
1198		     self.error("missing ';' or ',' after value")
1199
1200	     if token != None and token[0] == "sep":
1201		 if token[1] == ";":
1202		     self.comment = None
1203		     token = self.token()
1204		     if type == "struct":
1205		         self.index.add(self.name, self.filename,
1206			      not self.is_header, "struct", self.struct_fields)
1207		     else:
1208			 self.index.add(self.name, self.filename,
1209			      not self.is_header, "variable", type)
1210		     break
1211		 elif token[1] == "(":
1212		     token = self.token()
1213		     token = self.parseSignature(token)
1214		     if token == None:
1215			 return None
1216		     if token[0] == "sep" and token[1] == ";":
1217		         d = self.mergeFunctionComment(self.name,
1218				 ((type, None), self.signature), 1)
1219			 self.index.add(self.name, self.filename, static,
1220			                "function", d)
1221			 token = self.token()
1222		     elif token[0] == "sep" and token[1] == "{":
1223		         d = self.mergeFunctionComment(self.name,
1224				 ((type, None), self.signature), static)
1225			 self.index.add(self.name, self.filename, static,
1226			                "function", d)
1227			 token = self.token()
1228			 token = self.parseBlock(token);
1229		 elif token[1] == ',':
1230		     self.comment = None
1231		     self.index.add(self.name, self.filename, static,
1232		                    "variable", type)
1233		     type = type_orig
1234		     token = self.token()
1235		     while token != None and token[0] == "sep":
1236		         type = type + token[1]
1237			 token = self.token()
1238		     if token != None and token[0] == "name":
1239		         self.name = token[1]
1240			 token = self.token()
1241		 else:
1242		     break
1243
1244	 return token
1245
1246     def parse(self):
1247         print "Parsing %s" % (self.filename)
1248         token = self.token()
1249	 while token != None:
1250             if token[0] == 'name':
1251	         token = self.parseGlobal(token)
1252             else:
1253	         self.error("token %s %s unexpected at the top level" % (
1254		        token[0], token[1]))
1255		 token = self.parseGlobal(token)
1256		 return
1257         return self.index
1258
1259
1260class docBuilder:
1261     """A documentation builder"""
1262     def __init__(self, name, directories=['.'], excludes=[]):
1263         self.name = name
1264         self.directories = directories
1265	 self.excludes = excludes + ignored_files.keys()
1266	 self.modules = {}
1267	 self.headers = {}
1268	 self.idx = index()
1269
1270     def analyze(self):
1271         print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1272	 self.idx.analyze()
1273
1274     def scanHeaders(self):
1275	 for header in self.headers.keys():
1276	    parser = CParser(header)
1277	    idx = parser.parse()
1278	    self.headers[header] = idx;
1279	    self.idx.merge(idx)
1280
1281     def scanModules(self):
1282	 for module in self.modules.keys():
1283	    parser = CParser(module)
1284	    idx = parser.parse()
1285	    # idx.analyze()
1286	    self.modules[module] = idx
1287	    self.idx.merge_public(idx)
1288
1289     def scan(self):
1290         for directory in self.directories:
1291	     files = glob.glob(directory + "/*.c")
1292	     for file in files:
1293	         skip = 0
1294		 for excl in self.excludes:
1295		     if string.find(file, excl) != -1:
1296		         skip = 1;
1297			 break
1298		 if skip == 0:
1299		     self.modules[file] = None;
1300	     files = glob.glob(directory + "/*.h")
1301	     for file in files:
1302	         skip = 0
1303		 for excl in self.excludes:
1304		     if string.find(file, excl) != -1:
1305		         skip = 1;
1306			 break
1307		 if skip == 0:
1308		     self.headers[file] = None;
1309	 self.scanHeaders()
1310	 self.scanModules()
1311
1312     def modulename_file(self, file):
1313         module = os.path.basename(file)
1314	 if module[-2:] == '.h':
1315	     module = module[:-2]
1316	 return module
1317
1318     def serialize_enum(self, output, name):
1319         id = self.idx.enums[name]
1320         output.write("    <enum name='%s' file='%s'" % (name,
1321	              self.modulename_file(id.module)))
1322	 if id.info != None:
1323	     info = id.info
1324	     if info[0] != None and info[0] != '':
1325		 output.write(" value='%s'" % info[0]);
1326	     if info[2] != None and info[2] != '':
1327		 output.write(" type='%s'" % info[2]);
1328	     if info[1] != None and info[1] != '':
1329		 output.write(" info='%s'" % escape(info[1]));
1330         output.write("/>\n")
1331
1332     def serialize_macro(self, output, name):
1333         id = self.idx.macros[name]
1334         output.write("    <macro name='%s' file='%s'>\n" % (name,
1335	              self.modulename_file(id.module)))
1336	 if id.info != None:
1337             try:
1338		 (args, desc) = id.info
1339		 if desc != None and desc != "":
1340		     output.write("      <info>%s</info>\n" % (escape(desc)))
1341		 for arg in args:
1342		     (name, desc) = arg
1343		     if desc != None and desc != "":
1344			 output.write("      <arg name='%s' info='%s'/>\n" % (
1345				      name, escape(desc)))
1346		     else:
1347			 output.write("      <arg name='%s'/>\n" % (name))
1348             except:
1349                 pass
1350         output.write("    </macro>\n")
1351
1352     def serialize_typedef(self, output, name):
1353         id = self.idx.typedefs[name]
1354	 if id.info[0:7] == 'struct ':
1355	     output.write("    <struct name='%s' file='%s' type='%s'" % (
1356	              name, self.modulename_file(id.module), id.info))
1357	     name = id.info[7:]
1358	     if self.idx.structs.has_key(name) and ( \
1359	        type(self.idx.structs[name].info) == type(()) or
1360		type(self.idx.structs[name].info) == type([])):
1361	         output.write(">\n");
1362		 try:
1363		     for field in self.idx.structs[name].info:
1364			 desc = field[2]
1365			 if desc == None:
1366			     desc = ''
1367			 else:
1368			     desc = escape(desc)
1369			 output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1370		 except:
1371		     print "Failed to serialize struct %s" % (name)
1372		 output.write("    </struct>\n")
1373	     else:
1374	         output.write("/>\n");
1375	 else :
1376	     output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1377	              name, self.modulename_file(id.module), id.info))
1378
1379     def serialize_variable(self, output, name):
1380         id = self.idx.variables[name]
1381	 if id.info != None:
1382	     output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1383		     name, self.modulename_file(id.module), id.info))
1384	 else:
1385	     output.write("    <variable name='%s' file='%s'/>\n" % (
1386	             name, self.modulename_file(id.module)))
1387
1388     def serialize_function(self, output, name):
1389         id = self.idx.functions[name]
1390         output.write("    <%s name='%s' file='%s'>\n" % (id.type, name,
1391	              self.modulename_file(id.module)))
1392	 try:
1393	     (ret, params, desc) = id.info
1394	     output.write("      <info>%s</info>\n" % (escape(desc)))
1395	     if ret[0] != None:
1396	         if ret[0] == "void":
1397		     output.write("      <return type='void'/>\n")
1398		 else:
1399		     output.write("      <return type='%s' info='%s'/>\n" % (
1400			      ret[0], escape(ret[1])))
1401	     for param in params:
1402	         if param[0] == 'void':
1403		     continue
1404	         if param[2] == None:
1405		     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1406		 else:
1407		     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1408	 except:
1409	     print "Failed to save function %s info: " % name, `id.info`
1410         output.write("    </%s>\n" % (id.type))
1411
1412     def serialize_exports(self, output, file):
1413         module = self.modulename_file(file)
1414	 output.write("    <file name='%s'>\n" % (module))
1415	 dict = self.headers[file]
1416	 ids = dict.functions.keys() + dict.variables.keys() + \
1417	       dict.macros.keys() + dict.typedefs.keys() + \
1418	       dict.structs.keys() + dict.enums.keys()
1419	 ids.sort()
1420	 for id in ids:
1421	     output.write("     <exports symbol='%s'/>\n" % (id))
1422	 output.write("    </file>\n")
1423
1424
1425     def serialize(self, filename = None):
1426         if filename == None:
1427	     filename = "%s-api.xml" % self.name
1428         print "Saving XML description %s" % (filename)
1429	 output = open(filename, "w")
1430	 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1431	 output.write("<api name='%s'>\n" % self.name)
1432	 output.write("  <files>\n")
1433	 for file in self.headers.keys():
1434	     self.serialize_exports(output, file)
1435	 output.write("  </files>\n")
1436	 output.write("  <symbols>\n")
1437	 macros = self.idx.macros.keys()
1438	 macros.sort()
1439	 for macro in macros:
1440	     self.serialize_macro(output, macro)
1441	 enums = self.idx.enums.keys()
1442	 enums.sort()
1443	 for enum in enums:
1444	     self.serialize_enum(output, enum)
1445	 typedefs = self.idx.typedefs.keys()
1446	 typedefs.sort()
1447	 for typedef in typedefs:
1448	     self.serialize_typedef(output, typedef)
1449	 variables = self.idx.variables.keys()
1450	 variables.sort()
1451	 for variable in variables:
1452	     self.serialize_variable(output, variable)
1453	 functions = self.idx.functions.keys()
1454	 functions.sort()
1455	 for function in functions:
1456	     self.serialize_function(output, function)
1457	 output.write("  </symbols>\n")
1458	 output.write("</api>\n")
1459	 output.close()
1460
1461
1462def rebuild():
1463    builder = None
1464    if glob.glob("../parser.c") != [] :
1465        print "Rebuilding API description for libxml2"
1466	builder = docBuilder("libxml2", ["..", "../include/libxml"],
1467	                     ["xmlwin32version.h", "tst.c"])
1468    elif glob.glob("../libxslt/transform.c") != [] :
1469        print "Rebuilding API description for libxslt"
1470	builder = docBuilder("libxslt", ["../libxslt"],
1471	                     ["win32config.h", "libxslt.h", "tst.c"])
1472    else:
1473        print "rebuild() failed, unable to guess the module"
1474	return None
1475    builder.scan()
1476    builder.analyze()
1477    builder.serialize()
1478    if glob.glob("../libexslt/exslt.c") != [] :
1479        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1480	extra.scan()
1481	extra.analyze()
1482	extra.serialize()
1483    return builder
1484
1485#
1486# for debugging the parser
1487#
1488def parse(filename):
1489    parser = CParser(filename)
1490    idx = parser.parse()
1491    return idx
1492
1493if __name__ == "__main__":
1494    rebuild()
1495