apibuild.py revision 99dd7636a44208a14bc71614120f8ef4b8123016
1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "acconfig.h": "generated portability layer",
27  "config.h": "generated portability layer",
28  "libxml.h": "internal only",
29  "testOOM.c": "out of memory tester",
30  "testOOMlib.h": "out of memory tester",
31  "testOOMlib.c": "out of memory tester",
32  "rngparser.c": "not yet integrated",
33  "rngparser.h": "not yet integrated",
34  "elfgcchack.h": "not a normal header",
35  "testapi.c": "generated regression tests",
36  "tst.c": "not part of the library",
37  "testdso.c": "test for dynamid shared libraries",
38}
39
40ignored_words = {
41  "WINAPI": (0, "Windows keyword"),
42  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
43  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
44  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
45  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
46  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
47  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
48  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
49  "XMLCALL": (0, "Special macro for win32 calls"),
50  "XSLTCALL": (0, "Special macro for win32 calls"),
51  "EXSLTCALL": (0, "Special macro for win32 calls"),
52  "__declspec": (3, "Windows keyword"),
53  "__stdcall": (0, "Windows keyword"),
54  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
55  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
56  "X_IN_Y": (5, "macro function builder"),
57}
58
59def escape(raw):
60    raw = string.replace(raw, '&', '&')
61    raw = string.replace(raw, '<', '&lt;')
62    raw = string.replace(raw, '>', '&gt;')
63    raw = string.replace(raw, "'", '&apos;')
64    raw = string.replace(raw, '"', '&quot;')
65    return raw
66
67def uniq(items):
68    d = {}
69    for item in items:
70        d[item]=1
71    return d.keys()
72
73class identifier:
74    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
75                 info=None, extra=None, conditionals = None):
76        self.name = name
77	self.header = header
78	self.module = module
79	self.type = type
80	self.info = info
81	self.extra = extra
82	self.lineno = lineno
83	self.static = 0
84	if conditionals == None or len(conditionals) == 0:
85	    self.conditionals = None
86	else:
87	    self.conditionals = conditionals[:]
88	if self.name == debugsym:
89	    print "=> define %s : %s" % (debugsym, (module, type, info,
90	                                 extra, conditionals))
91
92    def __repr__(self):
93        r = "%s %s:" % (self.type, self.name)
94	if self.static:
95	    r = r + " static"
96	if self.module != None:
97	    r = r + " from %s" % (self.module)
98	if self.info != None:
99	    r = r + " " +  `self.info`
100	if self.extra != None:
101	    r = r + " " + `self.extra`
102	if self.conditionals != None:
103	    r = r + " " + `self.conditionals`
104	return r
105
106
107    def set_header(self, header):
108        self.header = header
109    def set_module(self, module):
110        self.module = module
111    def set_type(self, type):
112        self.type = type
113    def set_info(self, info):
114        self.info = info
115    def set_extra(self, extra):
116        self.extra = extra
117    def set_lineno(self, lineno):
118        self.lineno = lineno
119    def set_static(self, static):
120        self.static = static
121    def set_conditionals(self, conditionals):
122	if conditionals == None or len(conditionals) == 0:
123	    self.conditionals = None
124	else:
125	    self.conditionals = conditionals[:]
126
127    def get_name(self):
128        return self.name
129    def get_header(self):
130        return self.module
131    def get_module(self):
132        return self.module
133    def get_type(self):
134        return self.type
135    def get_info(self):
136        return self.info
137    def get_lineno(self):
138        return self.lineno
139    def get_extra(self):
140        return self.extra
141    def get_static(self):
142        return self.static
143    def get_conditionals(self):
144        return self.conditionals
145
146    def update(self, header, module, type = None, info = None, extra=None,
147               conditionals=None):
148	if self.name == debugsym:
149	    print "=> update %s : %s" % (debugsym, (module, type, info,
150	                                 extra, conditionals))
151        if header != None and self.header == None:
152	    self.set_header(module)
153        if module != None and (self.module == None or self.header == self.module):
154	    self.set_module(module)
155        if type != None and self.type == None:
156	    self.set_type(type)
157        if info != None:
158	    self.set_info(info)
159        if extra != None:
160	    self.set_extra(extra)
161        if conditionals != None:
162	    self.set_conditionals(conditionals)
163
164class index:
165    def __init__(self, name = "noname"):
166        self.name = name
167        self.identifiers = {}
168        self.functions = {}
169	self.variables = {}
170	self.includes = {}
171	self.structs = {}
172	self.enums = {}
173	self.typedefs = {}
174	self.macros = {}
175	self.references = {}
176	self.info = {}
177
178    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
179        if name[0:2] == '__':
180	    return None
181        d = None
182        try:
183	   d = self.identifiers[name]
184	   d.update(header, module, type, lineno, info, extra, conditionals)
185	except:
186	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
187	   self.identifiers[name] = d
188
189	if d != None and static == 1:
190	    d.set_static(1)
191
192	if d != None and name != None and type != None:
193	    self.references[name] = d
194
195	if name == debugsym:
196	    print "New ref: %s" % (d)
197
198	return d
199
200    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
201        if name[0:2] == '__':
202	    return None
203        d = None
204        try:
205	   d = self.identifiers[name]
206	   d.update(header, module, type, lineno, info, extra, conditionals)
207	except:
208	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
209	   self.identifiers[name] = d
210
211	if d != None and static == 1:
212	    d.set_static(1)
213
214	if d != None and name != None and type != None:
215	    if type == "function":
216	        self.functions[name] = d
217	    elif type == "functype":
218	        self.functions[name] = d
219	    elif type == "variable":
220	        self.variables[name] = d
221	    elif type == "include":
222	        self.includes[name] = d
223	    elif type == "struct":
224	        self.structs[name] = d
225	    elif type == "enum":
226	        self.enums[name] = d
227	    elif type == "typedef":
228	        self.typedefs[name] = d
229	    elif type == "macro":
230	        self.macros[name] = d
231	    else:
232	        print "Unable to register type ", type
233
234	if name == debugsym:
235	    print "New symbol: %s" % (d)
236
237	return d
238
239    def merge(self, idx):
240        for id in idx.functions.keys():
241              #
242              # macro might be used to override functions or variables
243              # definitions
244              #
245	     if self.macros.has_key(id):
246	         del self.macros[id]
247	     if self.functions.has_key(id):
248	         print "function %s from %s redeclared in %s" % (
249		    id, self.functions[id].header, idx.functions[id].header)
250	     else:
251	         self.functions[id] = idx.functions[id]
252		 self.identifiers[id] = idx.functions[id]
253        for id in idx.variables.keys():
254              #
255              # macro might be used to override functions or variables
256              # definitions
257              #
258	     if self.macros.has_key(id):
259	         del self.macros[id]
260	     if self.variables.has_key(id):
261	         print "variable %s from %s redeclared in %s" % (
262		    id, self.variables[id].header, idx.variables[id].header)
263	     else:
264	         self.variables[id] = idx.variables[id]
265		 self.identifiers[id] = idx.variables[id]
266        for id in idx.structs.keys():
267	     if self.structs.has_key(id):
268	         print "struct %s from %s redeclared in %s" % (
269		    id, self.structs[id].header, idx.structs[id].header)
270	     else:
271	         self.structs[id] = idx.structs[id]
272		 self.identifiers[id] = idx.structs[id]
273        for id in idx.typedefs.keys():
274	     if self.typedefs.has_key(id):
275	         print "typedef %s from %s redeclared in %s" % (
276		    id, self.typedefs[id].header, idx.typedefs[id].header)
277	     else:
278	         self.typedefs[id] = idx.typedefs[id]
279		 self.identifiers[id] = idx.typedefs[id]
280        for id in idx.macros.keys():
281              #
282              # macro might be used to override functions or variables
283              # definitions
284              #
285             if self.variables.has_key(id):
286                 continue
287             if self.functions.has_key(id):
288                 continue
289             if self.enums.has_key(id):
290                 continue
291	     if self.macros.has_key(id):
292	         print "macro %s from %s redeclared in %s" % (
293		    id, self.macros[id].header, idx.macros[id].header)
294	     else:
295	         self.macros[id] = idx.macros[id]
296		 self.identifiers[id] = idx.macros[id]
297        for id in idx.enums.keys():
298	     if self.enums.has_key(id):
299	         print "enum %s from %s redeclared in %s" % (
300		    id, self.enums[id].header, idx.enums[id].header)
301	     else:
302	         self.enums[id] = idx.enums[id]
303		 self.identifiers[id] = idx.enums[id]
304
305    def merge_public(self, idx):
306        for id in idx.functions.keys():
307	     if self.functions.has_key(id):
308	         # check that function condition agrees with header
309	         if idx.functions[id].conditionals != \
310		    self.functions[id].conditionals:
311		     print "Header condition differs from Function for %s:" \
312		        % id
313		     print "  H: %s" % self.functions[id].conditionals
314		     print "  C: %s" % idx.functions[id].conditionals
315	         up = idx.functions[id]
316	         self.functions[id].update(None, up.module, up.type, up.info, up.extra)
317	 #     else:
318	 #         print "Function %s from %s is not declared in headers" % (
319	 #	        id, idx.functions[id].module)
320	 # TODO: do the same for variables.
321
322    def analyze_dict(self, type, dict):
323        count = 0
324	public = 0
325        for name in dict.keys():
326	    id = dict[name]
327	    count = count + 1
328	    if id.static == 0:
329	        public = public + 1
330        if count != public:
331	    print "  %d %s , %d public" % (count, type, public)
332	elif count != 0:
333	    print "  %d public %s" % (count, type)
334
335
336    def analyze(self):
337	self.analyze_dict("functions", self.functions)
338	self.analyze_dict("variables", self.variables)
339	self.analyze_dict("structs", self.structs)
340	self.analyze_dict("typedefs", self.typedefs)
341	self.analyze_dict("macros", self.macros)
342
343class CLexer:
344    """A lexer for the C language, tokenize the input by reading and
345       analyzing it line by line"""
346    def __init__(self, input):
347        self.input = input
348	self.tokens = []
349	self.line = ""
350	self.lineno = 0
351
352    def getline(self):
353        line = ''
354	while line == '':
355	    line = self.input.readline()
356	    if not line:
357		return None
358	    self.lineno = self.lineno + 1
359	    line = string.lstrip(line)
360	    line = string.rstrip(line)
361	    if line == '':
362	        continue
363	    while line[-1] == '\\':
364	        line = line[:-1]
365		n = self.input.readline()
366		self.lineno = self.lineno + 1
367		n = string.lstrip(n)
368		n = string.rstrip(n)
369		if not n:
370		    break
371		else:
372		    line = line + n
373        return line
374
375    def getlineno(self):
376        return self.lineno
377
378    def push(self, token):
379        self.tokens.insert(0, token);
380
381    def debug(self):
382        print "Last token: ", self.last
383	print "Token queue: ", self.tokens
384	print "Line %d end: " % (self.lineno), self.line
385
386    def token(self):
387        while self.tokens == []:
388	    if self.line == "":
389		line = self.getline()
390	    else:
391	        line = self.line
392		self.line = ""
393	    if line == None:
394	        return None
395
396	    if line[0] == '#':
397	        self.tokens = map((lambda x: ('preproc', x)),
398		                  string.split(line))
399		break;
400	    l = len(line)
401	    if line[0] == '"' or line[0] == "'":
402	        end = line[0]
403	        line = line[1:]
404		found = 0
405		tok = ""
406		while found == 0:
407		    i = 0
408		    l = len(line)
409		    while i < l:
410			if line[i] == end:
411			    self.line = line[i+1:]
412			    line = line[:i]
413			    l = i
414			    found = 1
415			    break
416			if line[i] == '\\':
417			    i = i + 1
418			i = i + 1
419		    tok = tok + line
420		    if found == 0:
421		        line = self.getline()
422			if line == None:
423			    return None
424		self.last = ('string', tok)
425		return self.last
426
427	    if l >= 2 and line[0] == '/' and line[1] == '*':
428	        line = line[2:]
429		found = 0
430		tok = ""
431		while found == 0:
432		    i = 0
433		    l = len(line)
434		    while i < l:
435			if line[i] == '*' and i+1 < l and line[i+1] == '/':
436			    self.line = line[i+2:]
437			    line = line[:i-1]
438			    l = i
439			    found = 1
440			    break
441			i = i + 1
442	            if tok != "":
443		        tok = tok + "\n"
444		    tok = tok + line
445		    if found == 0:
446		        line = self.getline()
447			if line == None:
448			    return None
449		self.last = ('comment', tok)
450		return self.last
451	    if l >= 2 and line[0] == '/' and line[1] == '/':
452	        line = line[2:]
453		self.last = ('comment', line)
454		return self.last
455	    i = 0
456	    while i < l:
457	        if line[i] == '/' and i+1 < l and line[i+1] == '/':
458		    self.line = line[i:]
459		    line = line[:i]
460		    break
461	        if line[i] == '/' and i+1 < l and line[i+1] == '*':
462		    self.line = line[i:]
463		    line = line[:i]
464		    break
465		if line[i] == '"' or line[i] == "'":
466		    self.line = line[i:]
467		    line = line[:i]
468		    break
469		i = i + 1
470	    l = len(line)
471	    i = 0
472	    while i < l:
473	        if line[i] == ' ' or line[i] == '\t':
474		    i = i + 1
475		    continue
476		o = ord(line[i])
477		if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
478		   (o >= 48 and o <= 57):
479		    s = i
480		    while i < l:
481			o = ord(line[i])
482			if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
483			   (o >= 48 and o <= 57) or string.find(
484			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
485			    i = i + 1
486			else:
487			    break
488		    self.tokens.append(('name', line[s:i]))
489		    continue
490		if string.find("(){}:;,[]", line[i]) != -1:
491#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
492#		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
493#		    line[i] == ',' or line[i] == '[' or line[i] == ']':
494		    self.tokens.append(('sep', line[i]))
495		    i = i + 1
496		    continue
497		if string.find("+-*><=/%&!|.", line[i]) != -1:
498#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
499#		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
500#		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
501#		    line[i] == '!' or line[i] == '|' or line[i] == '.':
502		    if line[i] == '.' and  i + 2 < l and \
503		       line[i+1] == '.' and line[i+2] == '.':
504			self.tokens.append(('name', '...'))
505			i = i + 3
506			continue
507
508		    j = i + 1
509		    if j < l and (
510		       string.find("+-*><=/%&!|", line[j]) != -1):
511#		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
512#			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
513#			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
514#			line[j] == '!' or line[j] == '|'):
515			self.tokens.append(('op', line[i:j+1]))
516			i = j + 1
517		    else:
518			self.tokens.append(('op', line[i]))
519			i = i + 1
520		    continue
521		s = i
522		while i < l:
523		    o = ord(line[i])
524		    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
525		       (o >= 48 and o <= 57) or (
526		        string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
527#		         line[i] != ' ' and line[i] != '\t' and
528#			 line[i] != '(' and line[i] != ')' and
529#			 line[i] != '{'  and line[i] != '}' and
530#			 line[i] != ':' and line[i] != ';' and
531#			 line[i] != ',' and line[i] != '+' and
532#			 line[i] != '-' and line[i] != '*' and
533#			 line[i] != '/' and line[i] != '%' and
534#			 line[i] != '&' and line[i] != '!' and
535#			 line[i] != '|' and line[i] != '[' and
536#			 line[i] != ']' and line[i] != '=' and
537#			 line[i] != '*' and line[i] != '>' and
538#			 line[i] != '<'):
539			i = i + 1
540		    else:
541		        break
542		self.tokens.append(('name', line[s:i]))
543
544	tok = self.tokens[0]
545	self.tokens = self.tokens[1:]
546	self.last = tok
547	return tok
548
549class CParser:
550    """The C module parser"""
551    def __init__(self, filename, idx = None):
552        self.filename = filename
553	if len(filename) > 2 and filename[-2:] == '.h':
554	    self.is_header = 1
555	else:
556	    self.is_header = 0
557        self.input = open(filename)
558	self.lexer = CLexer(self.input)
559	if idx == None:
560	    self.index = index()
561	else:
562	    self.index = idx
563	self.top_comment = ""
564	self.last_comment = ""
565	self.comment = None
566	self.collect_ref = 0
567	self.no_error = 0
568	self.conditionals = []
569	self.defines = []
570
571    def collect_references(self):
572        self.collect_ref = 1
573
574    def stop_error(self):
575        self.no_error = 1
576
577    def start_error(self):
578        self.no_error = 0
579
580    def lineno(self):
581        return self.lexer.getlineno()
582
583    def index_add(self, name, module, static, type, info=None, extra = None):
584	if self.is_header == 1:
585	    self.index.add(name, module, module, static, type, self.lineno(),
586			   info, extra, self.conditionals)
587	else:
588	    self.index.add(name, None, module, static, type, self.lineno(),
589			   info, extra, self.conditionals)
590
591    def index_add_ref(self, name, module, static, type, info=None,
592                      extra = None):
593	if self.is_header == 1:
594	    self.index.add_ref(name, module, module, static, type,
595	                       self.lineno(), info, extra, self.conditionals)
596	else:
597	    self.index.add_ref(name, None, module, static, type, self.lineno(),
598			       info, extra, self.conditionals)
599
600    def warning(self, msg):
601        if self.no_error:
602	    return
603	print msg
604
605    def error(self, msg, token=-1):
606        if self.no_error:
607	    return
608
609        print "Parse Error: " + msg
610	if token != -1:
611	    print "Got token ", token
612	self.lexer.debug()
613	sys.exit(1)
614
615    def debug(self, msg, token=-1):
616        print "Debug: " + msg
617	if token != -1:
618	    print "Got token ", token
619	self.lexer.debug()
620
621    def parseTopComment(self, comment):
622	res = {}
623	lines = string.split(comment, "\n")
624	item = None
625	for line in lines:
626	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
627		line = line[1:]
628	    while line != "" and line[0] == '*':
629		line = line[1:]
630	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
631		line = line[1:]
632	    try:
633		(it, line) = string.split(line, ":", 1)
634		item = it
635		while line != "" and (line[0] == ' ' or line[0] == '\t'):
636		    line = line[1:]
637		if res.has_key(item):
638		    res[item] = res[item] + " " + line
639		else:
640		    res[item] = line
641	    except:
642		if item != None:
643		    if res.has_key(item):
644			res[item] = res[item] + " " + line
645		    else:
646			res[item] = line
647	self.index.info = res
648
649    def parseComment(self, token):
650        if self.top_comment == "":
651	    self.top_comment = token[1]
652	if self.comment == None or token[1][0] == '*':
653	    self.comment = token[1];
654	else:
655	    self.comment = self.comment + token[1]
656	token = self.lexer.token()
657
658        if string.find(self.comment, "DOC_DISABLE") != -1:
659	    self.stop_error()
660
661        if string.find(self.comment, "DOC_ENABLE") != -1:
662	    self.start_error()
663
664	return token
665
666     #
667     # Parse a comment block associate to a macro
668     #
669    def parseMacroComment(self, name, quiet = 0):
670        if name[0:2] == '__':
671	    quiet = 1
672
673        args = []
674	desc = ""
675
676        if self.comment == None:
677	    if not quiet:
678		self.warning("Missing comment for macro %s" % (name))
679	    return((args, desc))
680        if self.comment[0] != '*':
681	    if not quiet:
682		self.warning("Missing * in macro comment for %s" % (name))
683	    return((args, desc))
684	lines = string.split(self.comment, '\n')
685	if lines[0] == '*':
686	    del lines[0]
687	if lines[0] != "* %s:" % (name):
688	    if not quiet:
689		self.warning("Misformatted macro comment for %s" % (name))
690		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
691	    return((args, desc))
692	del lines[0]
693	while lines[0] == '*':
694	    del lines[0]
695	while len(lines) > 0 and lines[0][0:3] == '* @':
696	    l = lines[0][3:]
697	    try:
698	        (arg, desc) = string.split(l, ':', 1)
699		desc=string.strip(desc)
700		arg=string.strip(arg)
701            except:
702		if not quiet:
703		    self.warning("Misformatted macro comment for %s" % (name))
704		    self.warning("  problem with '%s'" % (lines[0]))
705		del lines[0]
706		continue
707	    del lines[0]
708	    l = string.strip(lines[0])
709	    while len(l) > 2 and l[0:3] != '* @':
710	        while l[0] == '*':
711		    l = l[1:]
712		desc = desc + ' ' + string.strip(l)
713		del lines[0]
714		if len(lines) == 0:
715		    break
716		l = lines[0]
717            args.append((arg, desc))
718	while len(lines) > 0 and lines[0] == '*':
719	    del lines[0]
720	desc = ""
721	while len(lines) > 0:
722	    l = lines[0]
723	    while len(l) > 0 and l[0] == '*':
724	        l = l[1:]
725	    l = string.strip(l)
726	    desc = desc + " " + l
727	    del lines[0]
728
729	desc = string.strip(desc)
730
731	if quiet == 0:
732	    if desc == "":
733	        self.warning("Macro comment for %s lack description of the macro" % (name))
734
735	return((args, desc))
736
737     #
738     # Parse a comment block and merge the informations found in the
739     # parameters descriptions, finally returns a block as complete
740     # as possible
741     #
742    def mergeFunctionComment(self, name, description, quiet = 0):
743        if name == 'main':
744	    quiet = 1
745        if name[0:2] == '__':
746	    quiet = 1
747
748	(ret, args) = description
749	desc = ""
750	retdesc = ""
751
752        if self.comment == None:
753	    if not quiet:
754		self.warning("Missing comment for function %s" % (name))
755	    return(((ret[0], retdesc), args, desc))
756        if self.comment[0] != '*':
757	    if not quiet:
758		self.warning("Missing * in function comment for %s" % (name))
759	    return(((ret[0], retdesc), args, desc))
760	lines = string.split(self.comment, '\n')
761	if lines[0] == '*':
762	    del lines[0]
763	if lines[0] != "* %s:" % (name):
764	    if not quiet:
765		self.warning("Misformatted function comment for %s" % (name))
766		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
767	    return(((ret[0], retdesc), args, desc))
768	del lines[0]
769	while lines[0] == '*':
770	    del lines[0]
771	nbargs = len(args)
772	while len(lines) > 0 and lines[0][0:3] == '* @':
773	    l = lines[0][3:]
774	    try:
775	        (arg, desc) = string.split(l, ':', 1)
776		desc=string.strip(desc)
777		arg=string.strip(arg)
778            except:
779		if not quiet:
780		    self.warning("Misformatted function comment for %s" % (name))
781		    self.warning("  problem with '%s'" % (lines[0]))
782		del lines[0]
783		continue
784	    del lines[0]
785	    l = string.strip(lines[0])
786	    while len(l) > 2 and l[0:3] != '* @':
787	        while l[0] == '*':
788		    l = l[1:]
789		desc = desc + ' ' + string.strip(l)
790		del lines[0]
791		if len(lines) == 0:
792		    break
793		l = lines[0]
794	    i = 0
795	    while i < nbargs:
796	        if args[i][1] == arg:
797		    args[i] = (args[i][0], arg, desc)
798		    break;
799		i = i + 1
800	    if i >= nbargs:
801		if not quiet:
802		    self.warning("Unable to find arg %s from function comment for %s" % (
803		       arg, name))
804	while len(lines) > 0 and lines[0] == '*':
805	    del lines[0]
806	desc = ""
807	while len(lines) > 0:
808	    l = lines[0]
809	    while len(l) > 0 and l[0] == '*':
810	        l = l[1:]
811	    l = string.strip(l)
812	    if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
813	        try:
814		    l = string.split(l, ' ', 1)[1]
815		except:
816		    l = ""
817		retdesc = string.strip(l)
818		del lines[0]
819		while len(lines) > 0:
820		    l = lines[0]
821		    while len(l) > 0 and l[0] == '*':
822			l = l[1:]
823		    l = string.strip(l)
824		    retdesc = retdesc + " " + l
825		    del lines[0]
826	    else:
827	        desc = desc + " " + l
828		del lines[0]
829
830	retdesc = string.strip(retdesc)
831	desc = string.strip(desc)
832
833	if quiet == 0:
834	     #
835	     # report missing comments
836	     #
837	    i = 0
838	    while i < nbargs:
839	        if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
840		    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
841		i = i + 1
842	    if retdesc == "" and ret[0] != "void":
843		self.warning("Function comment for %s lacks description of return value" % (name))
844	    if desc == "":
845	        self.warning("Function comment for %s lacks description of the function" % (name))
846
847
848	return(((ret[0], retdesc), args, desc))
849
850    def parsePreproc(self, token):
851	if debug:
852	    print "=> preproc ", token, self.lexer.tokens
853        name = token[1]
854	if name == "#include":
855	    token = self.lexer.token()
856	    if token == None:
857	        return None
858	    if token[0] == 'preproc':
859		self.index_add(token[1], self.filename, not self.is_header,
860		                "include")
861		return self.lexer.token()
862	    return token
863	if name == "#define":
864	    token = self.lexer.token()
865	    if token == None:
866	        return None
867	    if token[0] == 'preproc':
868	         # TODO macros with arguments
869		name = token[1]
870	        lst = []
871		token = self.lexer.token()
872		while token != None and token[0] == 'preproc' and \
873		      token[1][0] != '#':
874		    lst.append(token[1])
875		    token = self.lexer.token()
876                try:
877		    name = string.split(name, '(') [0]
878                except:
879                    pass
880                info = self.parseMacroComment(name, not self.is_header)
881		self.index_add(name, self.filename, not self.is_header,
882		                "macro", info)
883		return token
884
885	#
886	# Processing of conditionals modified by Bill 1/1/05
887	#
888	# We process conditionals (i.e. tokens from #ifdef, #ifndef,
889	# #if, #else and #endif) for headers and mainline code,
890	# store the ones from the header in libxml2-api.xml, and later
891	# (in the routine merge_public) verify that the two (header and
892	# mainline code) agree.
893	#
894	# There is a small problem with processing the headers. Some of
895	# the variables are not concerned with enabling / disabling of
896	# library functions (e.g. '__XML_PARSER_H__'), and we don't want
897	# them to be included in libxml2-api.xml, or involved in
898	# the check between the header and the mainline code.  To
899	# accomplish this, we ignore any conditional which doesn't include
900	# the string 'ENABLED'
901	#
902	if name == "#ifdef":
903	    apstr = self.lexer.tokens[0][1]
904	    try:
905	        self.defines.append(apstr)
906		if string.find(apstr, 'ENABLED') != -1:
907		    self.conditionals.append("defined(%s)" % apstr)
908	    except:
909	        pass
910	elif name == "#ifndef":
911	    apstr = self.lexer.tokens[0][1]
912	    try:
913	        self.defines.append(apstr)
914		if string.find(apstr, 'ENABLED') != -1:
915		    self.conditionals.append("!defined(%s)" % apstr)
916	    except:
917	        pass
918	elif name == "#if":
919	    apstr = ""
920	    for tok in self.lexer.tokens:
921	        if apstr != "":
922		    apstr = apstr + " "
923	        apstr = apstr + tok[1]
924	    try:
925	        self.defines.append(apstr)
926		if string.find(apstr, 'ENABLED') != -1:
927		    self.conditionals.append(apstr)
928	    except:
929	        pass
930	elif name == "#else":
931	    if self.conditionals != [] and \
932	       string.find(self.defines[-1], 'ENABLED') != -1:
933	        self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
934	elif name == "#endif":
935	    if self.conditionals != [] and \
936	       string.find(self.defines[-1], 'ENABLED') != -1:
937	        self.conditionals = self.conditionals[:-1]
938	    self.defines = self.defines[:-1]
939	token = self.lexer.token()
940	while token != None and token[0] == 'preproc' and \
941	    token[1][0] != '#':
942	    token = self.lexer.token()
943	return token
944
945     #
946     # token acquisition on top of the lexer, it handle internally
947     # preprocessor and comments since they are logically not part of
948     # the program structure.
949     #
950    def token(self):
951        global ignored_words
952
953        token = self.lexer.token()
954	while token != None:
955	    if token[0] == 'comment':
956		token = self.parseComment(token)
957		continue
958	    elif token[0] == 'preproc':
959		token = self.parsePreproc(token)
960		continue
961	    elif token[0] == "name" and token[1] == "__const":
962	        token = ("name", "const")
963		return token
964	    elif token[0] == "name" and token[1] == "__attribute":
965		token = self.lexer.token()
966		while token != None and token[1] != ";":
967		    token = self.lexer.token()
968		return token
969	    elif token[0] == "name" and ignored_words.has_key(token[1]):
970	        (n, info) = ignored_words[token[1]]
971		i = 0
972		while i < n:
973		    token = self.lexer.token()
974		    i = i + 1
975		token = self.lexer.token()
976		continue
977	    else:
978	        if debug:
979		    print "=> ", token
980	        return token
981	return None
982
983     #
984     # Parse a typedef, it records the type and its name.
985     #
986    def parseTypedef(self, token):
987        if token == None:
988	    return None
989	token = self.parseType(token)
990	if token == None:
991	    self.error("parsing typedef")
992	    return None
993	base_type = self.type
994	type = base_type
995	 #self.debug("end typedef type", token)
996	while token != None:
997	    if token[0] == "name":
998		name = token[1]
999		signature = self.signature
1000		if signature != None:
1001		    type = string.split(type, '(')[0]
1002		    d = self.mergeFunctionComment(name,
1003			    ((type, None), signature), 1)
1004		    self.index_add(name, self.filename, not self.is_header,
1005				    "functype", d)
1006		else:
1007		    if base_type == "struct":
1008			self.index_add(name, self.filename, not self.is_header,
1009					"struct", type)
1010			base_type = "struct " + name
1011	            else:
1012			self.index_add(name, self.filename, not self.is_header,
1013		                    "typedef", type)
1014		token = self.token()
1015	    else:
1016		self.error("parsing typedef: expecting a name")
1017		return token
1018	     #self.debug("end typedef", token)
1019	    if token != None and token[0] == 'sep' and token[1] == ',':
1020	        type = base_type
1021	        token = self.token()
1022		while token != None and token[0] == "op":
1023		    type = type + token[1]
1024		    token = self.token()
1025	    elif token != None and token[0] == 'sep' and token[1] == ';':
1026	        break;
1027	    elif token != None and token[0] == 'name':
1028	        type = base_type
1029	        continue;
1030	    else:
1031		self.error("parsing typedef: expecting ';'", token)
1032		return token
1033	token = self.token()
1034	return token
1035
1036     #
1037     # Parse a C code block, used for functions it parse till
1038     # the balancing } included
1039     #
1040    def parseBlock(self, token):
1041        while token != None:
1042	    if token[0] == "sep" and token[1] == "{":
1043	        token = self.token()
1044		token = self.parseBlock(token)
1045	    elif token[0] == "sep" and token[1] == "}":
1046	        self.comment = None
1047	        token = self.token()
1048		return token
1049	    else:
1050	        if self.collect_ref == 1:
1051		    oldtok = token
1052		    token = self.token()
1053		    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1054		        if token[0] == "sep" and token[1] == "(":
1055			    self.index_add_ref(oldtok[1], self.filename,
1056			                        0, "function")
1057			    token = self.token()
1058			elif token[0] == "name":
1059			    token = self.token()
1060			    if token[0] == "sep" and (token[1] == ";" or
1061			       token[1] == "," or token[1] == "="):
1062				self.index_add_ref(oldtok[1], self.filename,
1063						    0, "type")
1064		    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1065			self.index_add_ref(oldtok[1], self.filename,
1066					    0, "typedef")
1067		    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1068			self.index_add_ref(oldtok[1], self.filename,
1069					    0, "typedef")
1070
1071		else:
1072		    token = self.token()
1073	return token
1074
1075     #
1076     # Parse a C struct definition till the balancing }
1077     #
1078    def parseStruct(self, token):
1079        fields = []
1080	 #self.debug("start parseStruct", token)
1081        while token != None:
1082	    if token[0] == "sep" and token[1] == "{":
1083	        token = self.token()
1084		token = self.parseTypeBlock(token)
1085	    elif token[0] == "sep" and token[1] == "}":
1086		self.struct_fields = fields
1087		 #self.debug("end parseStruct", token)
1088		 #print fields
1089	        token = self.token()
1090		return token
1091	    else:
1092	        base_type = self.type
1093		 #self.debug("before parseType", token)
1094		token = self.parseType(token)
1095		 #self.debug("after parseType", token)
1096		if token != None and token[0] == "name":
1097		    fname = token[1]
1098		    token = self.token()
1099		    if token[0] == "sep" and token[1] == ";":
1100		        self.comment = None
1101		        token = self.token()
1102			fields.append((self.type, fname, self.comment))
1103			self.comment = None
1104		    else:
1105		        self.error("parseStruct: expecting ;", token)
1106		elif token != None and token[0] == "sep" and token[1] == "{":
1107		    token = self.token()
1108		    token = self.parseTypeBlock(token)
1109		    if token != None and token[0] == "name":
1110			token = self.token()
1111		    if token != None and token[0] == "sep" and token[1] == ";":
1112			token = self.token()
1113		    else:
1114		        self.error("parseStruct: expecting ;", token)
1115		else:
1116		    self.error("parseStruct: name", token)
1117		    token = self.token()
1118		self.type = base_type;
1119        self.struct_fields = fields
1120	 #self.debug("end parseStruct", token)
1121	 #print fields
1122	return token
1123
1124     #
1125     # Parse a C enum block, parse till the balancing }
1126     #
1127    def parseEnumBlock(self, token):
1128        self.enums = []
1129	name = None
1130	self.comment = None
1131	comment = ""
1132	value = "0"
1133        while token != None:
1134	    if token[0] == "sep" and token[1] == "{":
1135	        token = self.token()
1136		token = self.parseTypeBlock(token)
1137	    elif token[0] == "sep" and token[1] == "}":
1138		if name != None:
1139		    if self.comment != None:
1140			comment = self.comment
1141			self.comment = None
1142		    self.enums.append((name, value, comment))
1143	        token = self.token()
1144		return token
1145	    elif token[0] == "name":
1146		    if name != None:
1147			if self.comment != None:
1148			    comment = string.strip(self.comment)
1149			    self.comment = None
1150			self.enums.append((name, value, comment))
1151		    name = token[1]
1152		    comment = ""
1153		    token = self.token()
1154		    if token[0] == "op" and token[1][0] == "=":
1155		        value = ""
1156		        if len(token[1]) > 1:
1157			    value = token[1][1:]
1158		        token = self.token()
1159		        while token[0] != "sep" or (token[1] != ',' and
1160			      token[1] != '}'):
1161			    value = value + token[1]
1162			    token = self.token()
1163		    else:
1164		        try:
1165			    value = "%d" % (int(value) + 1)
1166			except:
1167			    self.warning("Failed to compute value of enum %s" % (name))
1168			    value=""
1169		    if token[0] == "sep" and token[1] == ",":
1170			token = self.token()
1171	    else:
1172	        token = self.token()
1173	return token
1174
1175     #
1176     # Parse a C definition block, used for structs it parse till
1177     # the balancing }
1178     #
1179    def parseTypeBlock(self, token):
1180        while token != None:
1181	    if token[0] == "sep" and token[1] == "{":
1182	        token = self.token()
1183		token = self.parseTypeBlock(token)
1184	    elif token[0] == "sep" and token[1] == "}":
1185	        token = self.token()
1186		return token
1187	    else:
1188	        token = self.token()
1189	return token
1190
1191     #
1192     # Parse a type: the fact that the type name can either occur after
1193     #    the definition or within the definition makes it a little harder
1194     #    if inside, the name token is pushed back before returning
1195     #
1196    def parseType(self, token):
1197        self.type = ""
1198	self.struct_fields = []
1199        self.signature = None
1200	if token == None:
1201	    return token
1202
1203	while token[0] == "name" and (
1204	      token[1] == "const" or token[1] == "unsigned"):
1205	    if self.type == "":
1206	        self.type = token[1]
1207	    else:
1208	        self.type = self.type + " " + token[1]
1209	    token = self.token()
1210
1211        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1212	    if self.type == "":
1213	        self.type = token[1]
1214	    else:
1215	        self.type = self.type + " " + token[1]
1216	    if token[0] == "name" and token[1] == "int":
1217		if self.type == "":
1218		    self.type = tmp[1]
1219		else:
1220		    self.type = self.type + " " + tmp[1]
1221
1222        elif token[0] == "name" and token[1] == "struct":
1223	    if self.type == "":
1224	        self.type = token[1]
1225	    else:
1226	        self.type = self.type + " " + token[1]
1227	    token = self.token()
1228	    nametok = None
1229	    if token[0] == "name":
1230	        nametok = token
1231		token = self.token()
1232	    if token != None and token[0] == "sep" and token[1] == "{":
1233		token = self.token()
1234		token = self.parseStruct(token)
1235	    elif token != None and token[0] == "op" and token[1] == "*":
1236	        self.type = self.type + " " + nametok[1] + " *"
1237		token = self.token()
1238		while token != None and token[0] == "op" and token[1] == "*":
1239		    self.type = self.type + " *"
1240		    token = self.token()
1241		if token[0] == "name":
1242		    nametok = token
1243		    token = self.token()
1244		else:
1245		    self.error("struct : expecting name", token)
1246		    return token
1247	    elif token != None and token[0] == "name" and nametok != None:
1248	        self.type = self.type + " " + nametok[1]
1249		return token
1250
1251	    if nametok != None:
1252		self.lexer.push(token)
1253		token = nametok
1254	    return token
1255
1256        elif token[0] == "name" and token[1] == "enum":
1257	    if self.type == "":
1258	        self.type = token[1]
1259	    else:
1260	        self.type = self.type + " " + token[1]
1261	    self.enums = []
1262	    token = self.token()
1263	    if token != None and token[0] == "sep" and token[1] == "{":
1264		token = self.token()
1265		token = self.parseEnumBlock(token)
1266	    else:
1267		self.error("parsing enum: expecting '{'", token)
1268	    enum_type = None
1269	    if token != None and token[0] != "name":
1270	        self.lexer.push(token)
1271	        token = ("name", "enum")
1272	    else:
1273	        enum_type = token[1]
1274	    for enum in self.enums:
1275		self.index_add(enum[0], self.filename,
1276			       not self.is_header, "enum",
1277			       (enum[1], enum[2], enum_type))
1278	    return token
1279
1280	elif token[0] == "name":
1281	    if self.type == "":
1282	        self.type = token[1]
1283	    else:
1284	        self.type = self.type + " " + token[1]
1285	else:
1286	    self.error("parsing type %s: expecting a name" % (self.type),
1287	               token)
1288	    return token
1289	token = self.token()
1290        while token != None and (token[0] == "op" or
1291	      token[0] == "name" and token[1] == "const"):
1292	    self.type = self.type + " " + token[1]
1293	    token = self.token()
1294
1295	 #
1296	 # if there is a parenthesis here, this means a function type
1297	 #
1298	if token != None and token[0] == "sep" and token[1] == '(':
1299	    self.type = self.type + token[1]
1300	    token = self.token()
1301	    while token != None and token[0] == "op" and token[1] == '*':
1302	        self.type = self.type + token[1]
1303		token = self.token()
1304	    if token == None or token[0] != "name" :
1305		self.error("parsing function type, name expected", token);
1306	        return token
1307	    self.type = self.type + token[1]
1308	    nametok = token
1309	    token = self.token()
1310	    if token != None and token[0] == "sep" and token[1] == ')':
1311		self.type = self.type + token[1]
1312		token = self.token()
1313		if token != None and token[0] == "sep" and token[1] == '(':
1314		    token = self.token()
1315		    type = self.type;
1316		    token = self.parseSignature(token);
1317		    self.type = type;
1318		else:
1319		    self.error("parsing function type, '(' expected", token);
1320		    return token
1321	    else:
1322	        self.error("parsing function type, ')' expected", token);
1323		return token
1324	    self.lexer.push(token)
1325	    token = nametok
1326	    return token
1327
1328         #
1329	 # do some lookahead for arrays
1330	 #
1331	if token != None and token[0] == "name":
1332	    nametok = token
1333	    token = self.token()
1334	    if token != None and token[0] == "sep" and token[1] == '[':
1335	        self.type = self.type + nametok[1]
1336		while token != None and token[0] == "sep" and token[1] == '[':
1337		    self.type = self.type + token[1]
1338		    token = self.token()
1339		    while token != None and token[0] != 'sep' and \
1340		          token[1] != ']' and token[1] != ';':
1341			self.type = self.type + token[1]
1342			token = self.token()
1343		if token != None and token[0] == 'sep' and token[1] == ']':
1344		    self.type = self.type + token[1]
1345		    token = self.token()
1346		else:
1347		    self.error("parsing array type, ']' expected", token);
1348		    return token
1349	    elif token != None and token[0] == "sep" and token[1] == ':':
1350	         # remove :12 in case it's a limited int size
1351		token = self.token()
1352		token = self.token()
1353	    self.lexer.push(token)
1354	    token = nametok
1355
1356	return token
1357
1358     #
1359     # Parse a signature: '(' has been parsed and we scan the type definition
1360     #    up to the ')' included
1361    def parseSignature(self, token):
1362        signature = []
1363	if token != None and token[0] == "sep" and token[1] == ')':
1364	    self.signature = []
1365	    token = self.token()
1366	    return token
1367	while token != None:
1368	    token = self.parseType(token)
1369	    if token != None and token[0] == "name":
1370	        signature.append((self.type, token[1], None))
1371		token = self.token()
1372	    elif token != None and token[0] == "sep" and token[1] == ',':
1373		token = self.token()
1374		continue
1375	    elif token != None and token[0] == "sep" and token[1] == ')':
1376	         # only the type was provided
1377		if self.type == "...":
1378		    signature.append((self.type, "...", None))
1379		else:
1380		    signature.append((self.type, None, None))
1381	    if token != None and token[0] == "sep":
1382	        if token[1] == ',':
1383		    token = self.token()
1384		    continue
1385		elif token[1] == ')':
1386		    token = self.token()
1387		    break
1388	self.signature = signature
1389	return token
1390
1391     #
1392     # Parse a global definition, be it a type, variable or function
1393     # the extern "C" blocks are a bit nasty and require it to recurse.
1394     #
1395    def parseGlobal(self, token):
1396        static = 0
1397        if token[1] == 'extern':
1398	    token = self.token()
1399	    if token == None:
1400	        return token
1401	    if token[0] == 'string':
1402	        if token[1] == 'C':
1403		    token = self.token()
1404		    if token == None:
1405			return token
1406		    if token[0] == 'sep' and token[1] == "{":
1407		        token = self.token()
1408#			 print 'Entering extern "C line ', self.lineno()
1409			while token != None and (token[0] != 'sep' or
1410			      token[1] != "}"):
1411			    if token[0] == 'name':
1412				token = self.parseGlobal(token)
1413			    else:
1414				self.error(
1415				 "token %s %s unexpected at the top level" % (
1416					token[0], token[1]))
1417				token = self.parseGlobal(token)
1418#			 print 'Exiting extern "C" line', self.lineno()
1419			token = self.token()
1420			return token
1421		else:
1422		    return token
1423	elif token[1] == 'static':
1424	    static = 1
1425	    token = self.token()
1426	    if token == None or  token[0] != 'name':
1427	        return token
1428
1429	if token[1] == 'typedef':
1430	    token = self.token()
1431	    return self.parseTypedef(token)
1432	else:
1433	    token = self.parseType(token)
1434	    type_orig = self.type
1435	if token == None or token[0] != "name":
1436	    return token
1437	type = type_orig
1438	self.name = token[1]
1439	token = self.token()
1440	while token != None and (token[0] == "sep" or token[0] == "op"):
1441	    if token[0] == "sep":
1442		if token[1] == "[":
1443		    type = type + token[1]
1444		    token = self.token()
1445		    while token != None and (token[0] != "sep" or \
1446		          token[1] != ";"):
1447			type = type + token[1]
1448			token = self.token()
1449
1450	    if token != None and token[0] == "op" and token[1] == "=":
1451		 #
1452		 # Skip the initialization of the variable
1453		 #
1454		token = self.token()
1455		if token[0] == 'sep' and token[1] == '{':
1456		    token = self.token()
1457		    token = self.parseBlock(token)
1458		else:
1459		    self.comment = None
1460		    while token != None and (token[0] != "sep" or \
1461			  (token[1] != ';' and token[1] != ',')):
1462			    token = self.token()
1463		self.comment = None
1464		if token == None or token[0] != "sep" or (token[1] != ';' and
1465		   token[1] != ','):
1466		    self.error("missing ';' or ',' after value")
1467
1468	    if token != None and token[0] == "sep":
1469		if token[1] == ";":
1470		    self.comment = None
1471		    token = self.token()
1472		    if type == "struct":
1473		        self.index_add(self.name, self.filename,
1474			     not self.is_header, "struct", self.struct_fields)
1475		    else:
1476			self.index_add(self.name, self.filename,
1477			     not self.is_header, "variable", type)
1478		    break
1479		elif token[1] == "(":
1480		    token = self.token()
1481		    token = self.parseSignature(token)
1482		    if token == None:
1483			return None
1484		    if token[0] == "sep" and token[1] == ";":
1485		        d = self.mergeFunctionComment(self.name,
1486				((type, None), self.signature), 1)
1487			self.index_add(self.name, self.filename, static,
1488			                "function", d)
1489			token = self.token()
1490		    elif token[0] == "sep" and token[1] == "{":
1491		        d = self.mergeFunctionComment(self.name,
1492				((type, None), self.signature), static)
1493			self.index_add(self.name, self.filename, static,
1494			                "function", d)
1495			token = self.token()
1496			token = self.parseBlock(token);
1497		elif token[1] == ',':
1498		    self.comment = None
1499		    self.index_add(self.name, self.filename, static,
1500		                    "variable", type)
1501		    type = type_orig
1502		    token = self.token()
1503		    while token != None and token[0] == "sep":
1504		        type = type + token[1]
1505			token = self.token()
1506		    if token != None and token[0] == "name":
1507		        self.name = token[1]
1508			token = self.token()
1509		else:
1510		    break
1511
1512	return token
1513
1514    def parse(self):
1515        self.warning("Parsing %s" % (self.filename))
1516        token = self.token()
1517	while token != None:
1518            if token[0] == 'name':
1519	        token = self.parseGlobal(token)
1520            else:
1521	        self.error("token %s %s unexpected at the top level" % (
1522		       token[0], token[1]))
1523		token = self.parseGlobal(token)
1524		return
1525	self.parseTopComment(self.top_comment)
1526        return self.index
1527
1528
1529class docBuilder:
1530    """A documentation builder"""
1531    def __init__(self, name, directories=['.'], excludes=[]):
1532        self.name = name
1533        self.directories = directories
1534	self.excludes = excludes + ignored_files.keys()
1535	self.modules = {}
1536	self.headers = {}
1537	self.idx = index()
1538        self.xref = {}
1539	self.index = {}
1540	if name == 'libxml2':
1541	    self.basename = 'libxml'
1542	else:
1543	    self.basename = name
1544
1545    def indexString(self, id, str):
1546	if str == None:
1547	    return
1548	str = string.replace(str, "'", ' ')
1549	str = string.replace(str, '"', ' ')
1550	str = string.replace(str, "/", ' ')
1551	str = string.replace(str, '*', ' ')
1552	str = string.replace(str, "[", ' ')
1553	str = string.replace(str, "]", ' ')
1554	str = string.replace(str, "(", ' ')
1555	str = string.replace(str, ")", ' ')
1556	str = string.replace(str, "<", ' ')
1557	str = string.replace(str, '>', ' ')
1558	str = string.replace(str, "&", ' ')
1559	str = string.replace(str, '#', ' ')
1560	str = string.replace(str, ",", ' ')
1561	str = string.replace(str, '.', ' ')
1562	str = string.replace(str, ';', ' ')
1563	tokens = string.split(str)
1564	for token in tokens:
1565	    try:
1566		c = token[0]
1567		if string.find(string.letters, c) < 0:
1568		    pass
1569		elif len(token) < 3:
1570		    pass
1571		else:
1572		    lower = string.lower(token)
1573		    # TODO: generalize this a bit
1574		    if lower == 'and' or lower == 'the':
1575			pass
1576		    elif self.xref.has_key(token):
1577			self.xref[token].append(id)
1578		    else:
1579			self.xref[token] = [id]
1580	    except:
1581		pass
1582
1583    def analyze(self):
1584        print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1585	self.idx.analyze()
1586
1587    def scanHeaders(self):
1588	for header in self.headers.keys():
1589	    parser = CParser(header)
1590	    idx = parser.parse()
1591	    self.headers[header] = idx;
1592	    self.idx.merge(idx)
1593
1594    def scanModules(self):
1595	for module in self.modules.keys():
1596	    parser = CParser(module)
1597	    idx = parser.parse()
1598	    # idx.analyze()
1599	    self.modules[module] = idx
1600	    self.idx.merge_public(idx)
1601
1602    def scan(self):
1603        for directory in self.directories:
1604	    files = glob.glob(directory + "/*.c")
1605	    for file in files:
1606	        skip = 0
1607		for excl in self.excludes:
1608		    if string.find(file, excl) != -1:
1609		        skip = 1;
1610			break
1611		if skip == 0:
1612		    self.modules[file] = None;
1613	    files = glob.glob(directory + "/*.h")
1614	    for file in files:
1615	        skip = 0
1616		for excl in self.excludes:
1617		    if string.find(file, excl) != -1:
1618		        skip = 1;
1619			break
1620		if skip == 0:
1621		    self.headers[file] = None;
1622	self.scanHeaders()
1623	self.scanModules()
1624
1625    def modulename_file(self, file):
1626        module = os.path.basename(file)
1627	if module[-2:] == '.h':
1628	    module = module[:-2]
1629	elif module[-2:] == '.c':
1630	    module = module[:-2]
1631	return module
1632
1633    def serialize_enum(self, output, name):
1634        id = self.idx.enums[name]
1635        output.write("    <enum name='%s' file='%s'" % (name,
1636	             self.modulename_file(id.header)))
1637	if id.info != None:
1638	    info = id.info
1639	    if info[0] != None and info[0] != '':
1640	        try:
1641		    val = eval(info[0])
1642		except:
1643		    val = info[0]
1644		output.write(" value='%s'" % (val));
1645	    if info[2] != None and info[2] != '':
1646		output.write(" type='%s'" % info[2]);
1647	    if info[1] != None and info[1] != '':
1648		output.write(" info='%s'" % escape(info[1]));
1649        output.write("/>\n")
1650
1651    def serialize_macro(self, output, name):
1652        id = self.idx.macros[name]
1653        output.write("    <macro name='%s' file='%s'>\n" % (name,
1654	             self.modulename_file(id.header)))
1655	if id.info != None:
1656            try:
1657		(args, desc) = id.info
1658		if desc != None and desc != "":
1659		    output.write("      <info>%s</info>\n" % (escape(desc)))
1660		    self.indexString(name, desc)
1661		for arg in args:
1662		    (name, desc) = arg
1663		    if desc != None and desc != "":
1664			output.write("      <arg name='%s' info='%s'/>\n" % (
1665				     name, escape(desc)))
1666			self.indexString(name, desc)
1667		    else:
1668			output.write("      <arg name='%s'/>\n" % (name))
1669            except:
1670                pass
1671        output.write("    </macro>\n")
1672
1673    def serialize_typedef(self, output, name):
1674        id = self.idx.typedefs[name]
1675	if id.info[0:7] == 'struct ':
1676	    output.write("    <struct name='%s' file='%s' type='%s'" % (
1677	             name, self.modulename_file(id.header), id.info))
1678	    name = id.info[7:]
1679	    if self.idx.structs.has_key(name) and ( \
1680	       type(self.idx.structs[name].info) == type(()) or
1681		type(self.idx.structs[name].info) == type([])):
1682	        output.write(">\n");
1683		try:
1684		    for field in self.idx.structs[name].info:
1685			desc = field[2]
1686			self.indexString(name, desc)
1687			if desc == None:
1688			    desc = ''
1689			else:
1690			    desc = escape(desc)
1691			output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1692		except:
1693		    print "Failed to serialize struct %s" % (name)
1694		output.write("    </struct>\n")
1695	    else:
1696	        output.write("/>\n");
1697	else :
1698	    output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1699	             name, self.modulename_file(id.header), id.info))
1700
1701    def serialize_variable(self, output, name):
1702        id = self.idx.variables[name]
1703	if id.info != None:
1704	    output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1705		    name, self.modulename_file(id.header), id.info))
1706	else:
1707	    output.write("    <variable name='%s' file='%s'/>\n" % (
1708	            name, self.modulename_file(id.header)))
1709
1710    def serialize_function(self, output, name):
1711        id = self.idx.functions[name]
1712	if name == debugsym:
1713	    print "=>", id
1714
1715        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1716	             name, self.modulename_file(id.header),
1717		     self.modulename_file(id.module)))
1718	#
1719	# Processing of conditionals modified by Bill 1/1/05
1720	#
1721	if id.conditionals != None:
1722	    apstr = ""
1723	    for cond in id.conditionals:
1724	        if apstr != "":
1725		    apstr = apstr + " &amp;&amp; "
1726		apstr = apstr + cond
1727	    output.write("      <cond>%s</cond>\n"% (apstr));
1728	try:
1729	    (ret, params, desc) = id.info
1730	    output.write("      <info>%s</info>\n" % (escape(desc)))
1731	    self.indexString(name, desc)
1732	    if ret[0] != None:
1733	        if ret[0] == "void":
1734		    output.write("      <return type='void'/>\n")
1735		else:
1736		    output.write("      <return type='%s' info='%s'/>\n" % (
1737			     ret[0], escape(ret[1])))
1738		    self.indexString(name, ret[1])
1739	    for param in params:
1740	        if param[0] == 'void':
1741		    continue
1742	        if param[2] == None:
1743		    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1744		else:
1745		    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1746		    self.indexString(name, param[2])
1747	except:
1748	    print "Failed to save function %s info: " % name, `id.info`
1749        output.write("    </%s>\n" % (id.type))
1750
1751    def serialize_exports(self, output, file):
1752        module = self.modulename_file(file)
1753	output.write("    <file name='%s'>\n" % (module))
1754	dict = self.headers[file]
1755	if dict.info != None:
1756	    for data in ('Summary', 'Description', 'Author'):
1757		try:
1758		    output.write("     <%s>%s</%s>\n" % (
1759		                 string.lower(data),
1760				 escape(dict.info[data]),
1761				 string.lower(data)))
1762		except:
1763		    print "Header %s lacks a %s description" % (module, data)
1764	    if dict.info.has_key('Description'):
1765	        desc = dict.info['Description']
1766		if string.find(desc, "DEPRECATED") != -1:
1767		    output.write("     <deprecated/>\n")
1768
1769        ids = dict.macros.keys()
1770	ids.sort()
1771	for id in uniq(ids):
1772	    # Macros are sometime used to masquerade other types.
1773	    if dict.functions.has_key(id):
1774	        continue
1775	    if dict.variables.has_key(id):
1776	        continue
1777	    if dict.typedefs.has_key(id):
1778	        continue
1779	    if dict.structs.has_key(id):
1780	        continue
1781	    if dict.enums.has_key(id):
1782	        continue
1783	    output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1784        ids = dict.enums.keys()
1785	ids.sort()
1786	for id in uniq(ids):
1787	    output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1788        ids = dict.typedefs.keys()
1789	ids.sort()
1790	for id in uniq(ids):
1791	    output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1792        ids = dict.structs.keys()
1793	ids.sort()
1794	for id in uniq(ids):
1795	    output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1796        ids = dict.variables.keys()
1797	ids.sort()
1798	for id in uniq(ids):
1799	    output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1800        ids = dict.functions.keys()
1801	ids.sort()
1802	for id in uniq(ids):
1803	    output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1804	output.write("    </file>\n")
1805
1806    def serialize_xrefs_files(self, output):
1807        headers = self.headers.keys()
1808        headers.sort()
1809        for file in headers:
1810	    module = self.modulename_file(file)
1811	    output.write("    <file name='%s'>\n" % (module))
1812	    dict = self.headers[file]
1813	    ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1814		  dict.macros.keys() + dict.typedefs.keys() + \
1815		  dict.structs.keys() + dict.enums.keys())
1816	    ids.sort()
1817	    for id in ids:
1818		output.write("      <ref name='%s'/>\n" % (id))
1819	    output.write("    </file>\n")
1820        pass
1821
1822    def serialize_xrefs_functions(self, output):
1823        funcs = {}
1824	for name in self.idx.functions.keys():
1825	    id = self.idx.functions[name]
1826	    try:
1827		(ret, params, desc) = id.info
1828		for param in params:
1829		    if param[0] == 'void':
1830			continue
1831		    if funcs.has_key(param[0]):
1832		        funcs[param[0]].append(name)
1833		    else:
1834		        funcs[param[0]] = [name]
1835	    except:
1836	        pass
1837	typ = funcs.keys()
1838	typ.sort()
1839	for type in typ:
1840	    if type == '' or type == 'void' or type == "int" or \
1841	       type == "char *" or type == "const char *" :
1842	        continue
1843	    output.write("    <type name='%s'>\n" % (type))
1844	    ids = funcs[type]
1845	    ids.sort()
1846	    pid = ''	# not sure why we have dups, but get rid of them!
1847	    for id in ids:
1848	        if id != pid:
1849	            output.write("      <ref name='%s'/>\n" % (id))
1850		    pid = id
1851	    output.write("    </type>\n")
1852
1853    def serialize_xrefs_constructors(self, output):
1854        funcs = {}
1855	for name in self.idx.functions.keys():
1856	    id = self.idx.functions[name]
1857	    try:
1858		(ret, params, desc) = id.info
1859		if ret[0] == "void":
1860		    continue
1861		if funcs.has_key(ret[0]):
1862		    funcs[ret[0]].append(name)
1863		else:
1864		    funcs[ret[0]] = [name]
1865	    except:
1866	        pass
1867	typ = funcs.keys()
1868	typ.sort()
1869	for type in typ:
1870	    if type == '' or type == 'void' or type == "int" or \
1871	       type == "char *" or type == "const char *" :
1872	        continue
1873	    output.write("    <type name='%s'>\n" % (type))
1874	    ids = funcs[type]
1875	    ids.sort()
1876	    for id in ids:
1877	        output.write("      <ref name='%s'/>\n" % (id))
1878	    output.write("    </type>\n")
1879
1880    def serialize_xrefs_alpha(self, output):
1881	letter = None
1882	ids = self.idx.identifiers.keys()
1883	ids.sort()
1884	for id in ids:
1885	    if id[0] != letter:
1886		if letter != None:
1887		    output.write("    </letter>\n")
1888		letter = id[0]
1889		output.write("    <letter name='%s'>\n" % (letter))
1890	    output.write("      <ref name='%s'/>\n" % (id))
1891	if letter != None:
1892	    output.write("    </letter>\n")
1893
1894    def serialize_xrefs_references(self, output):
1895        typ = self.idx.identifiers.keys()
1896	typ.sort()
1897	for id in typ:
1898	    idf = self.idx.identifiers[id]
1899	    module = idf.header
1900	    output.write("    <reference name='%s' href='%s'/>\n" % (id,
1901	                 'html/' + self.basename + '-' +
1902		         self.modulename_file(module) + '.html#' +
1903			 id))
1904
1905    def serialize_xrefs_index(self, output):
1906        index = self.xref
1907	typ = index.keys()
1908	typ.sort()
1909	letter = None
1910	count = 0
1911	chunk = 0
1912	chunks = []
1913	for id in typ:
1914	    if len(index[id]) > 30:
1915		continue
1916	    if id[0] != letter:
1917		if letter == None or count > 200:
1918		    if letter != None:
1919			output.write("      </letter>\n")
1920			output.write("    </chunk>\n")
1921			count = 0
1922			chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1923		    output.write("    <chunk name='chunk%s'>\n" % (chunk))
1924		    first_letter = id[0]
1925		    chunk = chunk + 1
1926		elif letter != None:
1927		    output.write("      </letter>\n")
1928		letter = id[0]
1929		output.write("      <letter name='%s'>\n" % (letter))
1930	    output.write("        <word name='%s'>\n" % (id))
1931	    tokens = index[id];
1932	    tokens.sort()
1933	    tok = None
1934	    for token in tokens:
1935		if tok == token:
1936		    continue
1937		tok = token
1938		output.write("          <ref name='%s'/>\n" % (token))
1939		count = count + 1
1940	    output.write("        </word>\n")
1941	if letter != None:
1942	    output.write("      </letter>\n")
1943	    output.write("    </chunk>\n")
1944	    if count != 0:
1945	        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1946	    output.write("    <chunks>\n")
1947	    for ch in chunks:
1948		output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
1949			     ch[0], ch[1], ch[2]))
1950	    output.write("    </chunks>\n")
1951
1952    def serialize_xrefs(self, output):
1953	output.write("  <references>\n")
1954	self.serialize_xrefs_references(output)
1955	output.write("  </references>\n")
1956	output.write("  <alpha>\n")
1957	self.serialize_xrefs_alpha(output)
1958	output.write("  </alpha>\n")
1959	output.write("  <constructors>\n")
1960	self.serialize_xrefs_constructors(output)
1961	output.write("  </constructors>\n")
1962	output.write("  <functions>\n")
1963	self.serialize_xrefs_functions(output)
1964	output.write("  </functions>\n")
1965	output.write("  <files>\n")
1966	self.serialize_xrefs_files(output)
1967	output.write("  </files>\n")
1968	output.write("  <index>\n")
1969	self.serialize_xrefs_index(output)
1970	output.write("  </index>\n")
1971
1972    def serialize(self):
1973        filename = "%s-api.xml" % self.name
1974        print "Saving XML description %s" % (filename)
1975        output = open(filename, "w")
1976        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1977        output.write("<api name='%s'>\n" % self.name)
1978        output.write("  <files>\n")
1979        headers = self.headers.keys()
1980        headers.sort()
1981        for file in headers:
1982            self.serialize_exports(output, file)
1983        output.write("  </files>\n")
1984        output.write("  <symbols>\n")
1985        macros = self.idx.macros.keys()
1986        macros.sort()
1987        for macro in macros:
1988            self.serialize_macro(output, macro)
1989        enums = self.idx.enums.keys()
1990        enums.sort()
1991        for enum in enums:
1992            self.serialize_enum(output, enum)
1993        typedefs = self.idx.typedefs.keys()
1994        typedefs.sort()
1995        for typedef in typedefs:
1996            self.serialize_typedef(output, typedef)
1997        variables = self.idx.variables.keys()
1998        variables.sort()
1999        for variable in variables:
2000            self.serialize_variable(output, variable)
2001        functions = self.idx.functions.keys()
2002        functions.sort()
2003        for function in functions:
2004            self.serialize_function(output, function)
2005        output.write("  </symbols>\n")
2006        output.write("</api>\n")
2007        output.close()
2008
2009        filename = "%s-refs.xml" % self.name
2010        print "Saving XML Cross References %s" % (filename)
2011        output = open(filename, "w")
2012        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2013        output.write("<apirefs name='%s'>\n" % self.name)
2014        self.serialize_xrefs(output)
2015        output.write("</apirefs>\n")
2016        output.close()
2017
2018
2019def rebuild():
2020    builder = None
2021    if glob.glob("parser.c") != [] :
2022        print "Rebuilding API description for libxml2"
2023	builder = docBuilder("libxml2", [".", "."],
2024	                     ["xmlwin32version.h", "tst.c"])
2025    elif glob.glob("../parser.c") != [] :
2026        print "Rebuilding API description for libxml2"
2027	builder = docBuilder("libxml2", ["..", "../include/libxml"],
2028	                     ["xmlwin32version.h", "tst.c"])
2029    elif glob.glob("../libxslt/transform.c") != [] :
2030        print "Rebuilding API description for libxslt"
2031	builder = docBuilder("libxslt", ["../libxslt"],
2032	                     ["win32config.h", "libxslt.h", "tst.c"])
2033    else:
2034        print "rebuild() failed, unable to guess the module"
2035	return None
2036    builder.scan()
2037    builder.analyze()
2038    builder.serialize()
2039    if glob.glob("../libexslt/exslt.c") != [] :
2040        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2041	extra.scan()
2042	extra.analyze()
2043	extra.serialize()
2044    return builder
2045
2046#
2047# for debugging the parser
2048#
2049def parse(filename):
2050    parser = CParser(filename)
2051    idx = parser.parse()
2052    return idx
2053
2054if __name__ == "__main__":
2055    if len(sys.argv) > 1:
2056        debug = 1
2057        parse(sys.argv[1])
2058    else:
2059	rebuild()
2060