apibuild.py revision 99b78502b649a03a00b0ec83288a2e7216da7a17
1#!/usr/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "acconfig.h": "generated portability layer",
27  "config.h": "generated portability layer",
28  "libxml.h": "internal only",
29  "testOOM.c": "out of memory tester",
30  "testOOMlib.h": "out of memory tester",
31  "testOOMlib.c": "out of memory tester",
32  "rngparser.c": "not yet integrated",
33  "rngparser.h": "not yet integrated",
34  "elfgcchack.h": "not a normal header",
35  "testHTML.c": "test tool",
36  "testReader.c": "test tool",
37  "testSchemas.c": "test tool",
38  "testXPath.c": "test tool",
39  "testAutomata.c": "test tool",
40  "testModule.c": "test tool",
41  "testRegexp.c": "test tool",
42  "testThreads.c": "test tool",
43  "testC14N.c": "test tool",
44  "testRelax.c": "test tool",
45  "testThreadsWin32.c": "test tool",
46  "testSAX.c": "test tool",
47  "testURI.c": "test tool",
48  "testapi.c": "generated regression tests",
49  "runtest.c": "regression tests program",
50  "runsuite.c": "regression tests program",
51  "tst.c": "not part of the library",
52  "testdso.c": "test for dynamid shared libraries",
53}
54
55ignored_words = {
56  "WINAPI": (0, "Windows keyword"),
57  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
58  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
59  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
60  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
61  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
62  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
63  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
64  "XMLCALL": (0, "Special macro for win32 calls"),
65  "XSLTCALL": (0, "Special macro for win32 calls"),
66  "XMLCDECL": (0, "Special macro for win32 calls"),
67  "EXSLTCALL": (0, "Special macro for win32 calls"),
68  "__declspec": (3, "Windows keyword"),
69  "__stdcall": (0, "Windows keyword"),
70  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
71  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
72  "X_IN_Y": (5, "macro function builder"),
73}
74
75def escape(raw):
76    raw = string.replace(raw, '&', '&')
77    raw = string.replace(raw, '<', '&lt;')
78    raw = string.replace(raw, '>', '&gt;')
79    raw = string.replace(raw, "'", '&apos;')
80    raw = string.replace(raw, '"', '&quot;')
81    return raw
82
83def uniq(items):
84    d = {}
85    for item in items:
86        d[item]=1
87    return d.keys()
88
89class identifier:
90    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
91                 info=None, extra=None, conditionals = None):
92        self.name = name
93	self.header = header
94	self.module = module
95	self.type = type
96	self.info = info
97	self.extra = extra
98	self.lineno = lineno
99	self.static = 0
100	if conditionals == None or len(conditionals) == 0:
101	    self.conditionals = None
102	else:
103	    self.conditionals = conditionals[:]
104	if self.name == debugsym:
105	    print "=> define %s : %s" % (debugsym, (module, type, info,
106	                                 extra, conditionals))
107
108    def __repr__(self):
109        r = "%s %s:" % (self.type, self.name)
110	if self.static:
111	    r = r + " static"
112	if self.module != None:
113	    r = r + " from %s" % (self.module)
114	if self.info != None:
115	    r = r + " " +  `self.info`
116	if self.extra != None:
117	    r = r + " " + `self.extra`
118	if self.conditionals != None:
119	    r = r + " " + `self.conditionals`
120	return r
121
122
123    def set_header(self, header):
124        self.header = header
125    def set_module(self, module):
126        self.module = module
127    def set_type(self, type):
128        self.type = type
129    def set_info(self, info):
130        self.info = info
131    def set_extra(self, extra):
132        self.extra = extra
133    def set_lineno(self, lineno):
134        self.lineno = lineno
135    def set_static(self, static):
136        self.static = static
137    def set_conditionals(self, conditionals):
138	if conditionals == None or len(conditionals) == 0:
139	    self.conditionals = None
140	else:
141	    self.conditionals = conditionals[:]
142
143    def get_name(self):
144        return self.name
145    def get_header(self):
146        return self.module
147    def get_module(self):
148        return self.module
149    def get_type(self):
150        return self.type
151    def get_info(self):
152        return self.info
153    def get_lineno(self):
154        return self.lineno
155    def get_extra(self):
156        return self.extra
157    def get_static(self):
158        return self.static
159    def get_conditionals(self):
160        return self.conditionals
161
162    def update(self, header, module, type = None, info = None, extra=None,
163               conditionals=None):
164	if self.name == debugsym:
165	    print "=> update %s : %s" % (debugsym, (module, type, info,
166	                                 extra, conditionals))
167        if header != None and self.header == None:
168	    self.set_header(module)
169        if module != None and (self.module == None or self.header == self.module):
170	    self.set_module(module)
171        if type != None and self.type == None:
172	    self.set_type(type)
173        if info != None:
174	    self.set_info(info)
175        if extra != None:
176	    self.set_extra(extra)
177        if conditionals != None:
178	    self.set_conditionals(conditionals)
179
180class index:
181    def __init__(self, name = "noname"):
182        self.name = name
183        self.identifiers = {}
184        self.functions = {}
185	self.variables = {}
186	self.includes = {}
187	self.structs = {}
188	self.enums = {}
189	self.typedefs = {}
190	self.macros = {}
191	self.references = {}
192	self.info = {}
193
194    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
195        if name[0:2] == '__':
196	    return None
197        d = None
198        try:
199	   d = self.identifiers[name]
200	   d.update(header, module, type, lineno, info, extra, conditionals)
201	except:
202	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
203	   self.identifiers[name] = d
204
205	if d != None and static == 1:
206	    d.set_static(1)
207
208	if d != None and name != None and type != None:
209	    self.references[name] = d
210
211	if name == debugsym:
212	    print "New ref: %s" % (d)
213
214	return d
215
216    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
217        if name[0:2] == '__':
218	    return None
219        d = None
220        try:
221	   d = self.identifiers[name]
222	   d.update(header, module, type, lineno, info, extra, conditionals)
223	except:
224	   d = identifier(name, header, module, type, lineno, info, extra, conditionals)
225	   self.identifiers[name] = d
226
227	if d != None and static == 1:
228	    d.set_static(1)
229
230	if d != None and name != None and type != None:
231	    if type == "function":
232	        self.functions[name] = d
233	    elif type == "functype":
234	        self.functions[name] = d
235	    elif type == "variable":
236	        self.variables[name] = d
237	    elif type == "include":
238	        self.includes[name] = d
239	    elif type == "struct":
240	        self.structs[name] = d
241	    elif type == "enum":
242	        self.enums[name] = d
243	    elif type == "typedef":
244	        self.typedefs[name] = d
245	    elif type == "macro":
246	        self.macros[name] = d
247	    else:
248	        print "Unable to register type ", type
249
250	if name == debugsym:
251	    print "New symbol: %s" % (d)
252
253	return d
254
255    def merge(self, idx):
256        for id in idx.functions.keys():
257              #
258              # macro might be used to override functions or variables
259              # definitions
260              #
261	     if self.macros.has_key(id):
262	         del self.macros[id]
263	     if self.functions.has_key(id):
264	         print "function %s from %s redeclared in %s" % (
265		    id, self.functions[id].header, idx.functions[id].header)
266	     else:
267	         self.functions[id] = idx.functions[id]
268		 self.identifiers[id] = idx.functions[id]
269        for id in idx.variables.keys():
270              #
271              # macro might be used to override functions or variables
272              # definitions
273              #
274	     if self.macros.has_key(id):
275	         del self.macros[id]
276	     if self.variables.has_key(id):
277	         print "variable %s from %s redeclared in %s" % (
278		    id, self.variables[id].header, idx.variables[id].header)
279	     else:
280	         self.variables[id] = idx.variables[id]
281		 self.identifiers[id] = idx.variables[id]
282        for id in idx.structs.keys():
283	     if self.structs.has_key(id):
284	         print "struct %s from %s redeclared in %s" % (
285		    id, self.structs[id].header, idx.structs[id].header)
286	     else:
287	         self.structs[id] = idx.structs[id]
288		 self.identifiers[id] = idx.structs[id]
289        for id in idx.typedefs.keys():
290	     if self.typedefs.has_key(id):
291	         print "typedef %s from %s redeclared in %s" % (
292		    id, self.typedefs[id].header, idx.typedefs[id].header)
293	     else:
294	         self.typedefs[id] = idx.typedefs[id]
295		 self.identifiers[id] = idx.typedefs[id]
296        for id in idx.macros.keys():
297              #
298              # macro might be used to override functions or variables
299              # definitions
300              #
301             if self.variables.has_key(id):
302                 continue
303             if self.functions.has_key(id):
304                 continue
305             if self.enums.has_key(id):
306                 continue
307	     if self.macros.has_key(id):
308	         print "macro %s from %s redeclared in %s" % (
309		    id, self.macros[id].header, idx.macros[id].header)
310	     else:
311	         self.macros[id] = idx.macros[id]
312		 self.identifiers[id] = idx.macros[id]
313        for id in idx.enums.keys():
314	     if self.enums.has_key(id):
315	         print "enum %s from %s redeclared in %s" % (
316		    id, self.enums[id].header, idx.enums[id].header)
317	     else:
318	         self.enums[id] = idx.enums[id]
319		 self.identifiers[id] = idx.enums[id]
320
321    def merge_public(self, idx):
322        for id in idx.functions.keys():
323	     if self.functions.has_key(id):
324	         # check that function condition agrees with header
325	         if idx.functions[id].conditionals != \
326		    self.functions[id].conditionals:
327		     print "Header condition differs from Function for %s:" \
328		        % id
329		     print "  H: %s" % self.functions[id].conditionals
330		     print "  C: %s" % idx.functions[id].conditionals
331	         up = idx.functions[id]
332	         self.functions[id].update(None, up.module, up.type, up.info, up.extra)
333	 #     else:
334	 #         print "Function %s from %s is not declared in headers" % (
335	 #	        id, idx.functions[id].module)
336	 # TODO: do the same for variables.
337
338    def analyze_dict(self, type, dict):
339        count = 0
340	public = 0
341        for name in dict.keys():
342	    id = dict[name]
343	    count = count + 1
344	    if id.static == 0:
345	        public = public + 1
346        if count != public:
347	    print "  %d %s , %d public" % (count, type, public)
348	elif count != 0:
349	    print "  %d public %s" % (count, type)
350
351
352    def analyze(self):
353	self.analyze_dict("functions", self.functions)
354	self.analyze_dict("variables", self.variables)
355	self.analyze_dict("structs", self.structs)
356	self.analyze_dict("typedefs", self.typedefs)
357	self.analyze_dict("macros", self.macros)
358
359class CLexer:
360    """A lexer for the C language, tokenize the input by reading and
361       analyzing it line by line"""
362    def __init__(self, input):
363        self.input = input
364	self.tokens = []
365	self.line = ""
366	self.lineno = 0
367
368    def getline(self):
369        line = ''
370	while line == '':
371	    line = self.input.readline()
372	    if not line:
373		return None
374	    self.lineno = self.lineno + 1
375	    line = string.lstrip(line)
376	    line = string.rstrip(line)
377	    if line == '':
378	        continue
379	    while line[-1] == '\\':
380	        line = line[:-1]
381		n = self.input.readline()
382		self.lineno = self.lineno + 1
383		n = string.lstrip(n)
384		n = string.rstrip(n)
385		if not n:
386		    break
387		else:
388		    line = line + n
389        return line
390
391    def getlineno(self):
392        return self.lineno
393
394    def push(self, token):
395        self.tokens.insert(0, token);
396
397    def debug(self):
398        print "Last token: ", self.last
399	print "Token queue: ", self.tokens
400	print "Line %d end: " % (self.lineno), self.line
401
402    def token(self):
403        while self.tokens == []:
404	    if self.line == "":
405		line = self.getline()
406	    else:
407	        line = self.line
408		self.line = ""
409	    if line == None:
410	        return None
411
412	    if line[0] == '#':
413	        self.tokens = map((lambda x: ('preproc', x)),
414		                  string.split(line))
415		break;
416	    l = len(line)
417	    if line[0] == '"' or line[0] == "'":
418	        end = line[0]
419	        line = line[1:]
420		found = 0
421		tok = ""
422		while found == 0:
423		    i = 0
424		    l = len(line)
425		    while i < l:
426			if line[i] == end:
427			    self.line = line[i+1:]
428			    line = line[:i]
429			    l = i
430			    found = 1
431			    break
432			if line[i] == '\\':
433			    i = i + 1
434			i = i + 1
435		    tok = tok + line
436		    if found == 0:
437		        line = self.getline()
438			if line == None:
439			    return None
440		self.last = ('string', tok)
441		return self.last
442
443	    if l >= 2 and line[0] == '/' and line[1] == '*':
444	        line = line[2:]
445		found = 0
446		tok = ""
447		while found == 0:
448		    i = 0
449		    l = len(line)
450		    while i < l:
451			if line[i] == '*' and i+1 < l and line[i+1] == '/':
452			    self.line = line[i+2:]
453			    line = line[:i-1]
454			    l = i
455			    found = 1
456			    break
457			i = i + 1
458	            if tok != "":
459		        tok = tok + "\n"
460		    tok = tok + line
461		    if found == 0:
462		        line = self.getline()
463			if line == None:
464			    return None
465		self.last = ('comment', tok)
466		return self.last
467	    if l >= 2 and line[0] == '/' and line[1] == '/':
468	        line = line[2:]
469		self.last = ('comment', line)
470		return self.last
471	    i = 0
472	    while i < l:
473	        if line[i] == '/' and i+1 < l and line[i+1] == '/':
474		    self.line = line[i:]
475		    line = line[:i]
476		    break
477	        if line[i] == '/' and i+1 < l and line[i+1] == '*':
478		    self.line = line[i:]
479		    line = line[:i]
480		    break
481		if line[i] == '"' or line[i] == "'":
482		    self.line = line[i:]
483		    line = line[:i]
484		    break
485		i = i + 1
486	    l = len(line)
487	    i = 0
488	    while i < l:
489	        if line[i] == ' ' or line[i] == '\t':
490		    i = i + 1
491		    continue
492		o = ord(line[i])
493		if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
494		   (o >= 48 and o <= 57):
495		    s = i
496		    while i < l:
497			o = ord(line[i])
498			if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
499			   (o >= 48 and o <= 57) or string.find(
500			       " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
501			    i = i + 1
502			else:
503			    break
504		    self.tokens.append(('name', line[s:i]))
505		    continue
506		if string.find("(){}:;,[]", line[i]) != -1:
507#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
508#		    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
509#		    line[i] == ',' or line[i] == '[' or line[i] == ']':
510		    self.tokens.append(('sep', line[i]))
511		    i = i + 1
512		    continue
513		if string.find("+-*><=/%&!|.", line[i]) != -1:
514#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
515#		    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
516#		    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
517#		    line[i] == '!' or line[i] == '|' or line[i] == '.':
518		    if line[i] == '.' and  i + 2 < l and \
519		       line[i+1] == '.' and line[i+2] == '.':
520			self.tokens.append(('name', '...'))
521			i = i + 3
522			continue
523
524		    j = i + 1
525		    if j < l and (
526		       string.find("+-*><=/%&!|", line[j]) != -1):
527#		        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
528#			line[j] == '>' or line[j] == '<' or line[j] == '=' or \
529#			line[j] == '/' or line[j] == '%' or line[j] == '&' or \
530#			line[j] == '!' or line[j] == '|'):
531			self.tokens.append(('op', line[i:j+1]))
532			i = j + 1
533		    else:
534			self.tokens.append(('op', line[i]))
535			i = i + 1
536		    continue
537		s = i
538		while i < l:
539		    o = ord(line[i])
540		    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
541		       (o >= 48 and o <= 57) or (
542		        string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
543#		         line[i] != ' ' and line[i] != '\t' and
544#			 line[i] != '(' and line[i] != ')' and
545#			 line[i] != '{'  and line[i] != '}' and
546#			 line[i] != ':' and line[i] != ';' and
547#			 line[i] != ',' and line[i] != '+' and
548#			 line[i] != '-' and line[i] != '*' and
549#			 line[i] != '/' and line[i] != '%' and
550#			 line[i] != '&' and line[i] != '!' and
551#			 line[i] != '|' and line[i] != '[' and
552#			 line[i] != ']' and line[i] != '=' and
553#			 line[i] != '*' and line[i] != '>' and
554#			 line[i] != '<'):
555			i = i + 1
556		    else:
557		        break
558		self.tokens.append(('name', line[s:i]))
559
560	tok = self.tokens[0]
561	self.tokens = self.tokens[1:]
562	self.last = tok
563	return tok
564
565class CParser:
566    """The C module parser"""
567    def __init__(self, filename, idx = None):
568        self.filename = filename
569	if len(filename) > 2 and filename[-2:] == '.h':
570	    self.is_header = 1
571	else:
572	    self.is_header = 0
573        self.input = open(filename)
574	self.lexer = CLexer(self.input)
575	if idx == None:
576	    self.index = index()
577	else:
578	    self.index = idx
579	self.top_comment = ""
580	self.last_comment = ""
581	self.comment = None
582	self.collect_ref = 0
583	self.no_error = 0
584	self.conditionals = []
585	self.defines = []
586
587    def collect_references(self):
588        self.collect_ref = 1
589
590    def stop_error(self):
591        self.no_error = 1
592
593    def start_error(self):
594        self.no_error = 0
595
596    def lineno(self):
597        return self.lexer.getlineno()
598
599    def index_add(self, name, module, static, type, info=None, extra = None):
600	if self.is_header == 1:
601	    self.index.add(name, module, module, static, type, self.lineno(),
602			   info, extra, self.conditionals)
603	else:
604	    self.index.add(name, None, module, static, type, self.lineno(),
605			   info, extra, self.conditionals)
606
607    def index_add_ref(self, name, module, static, type, info=None,
608                      extra = None):
609	if self.is_header == 1:
610	    self.index.add_ref(name, module, module, static, type,
611	                       self.lineno(), info, extra, self.conditionals)
612	else:
613	    self.index.add_ref(name, None, module, static, type, self.lineno(),
614			       info, extra, self.conditionals)
615
616    def warning(self, msg):
617        if self.no_error:
618	    return
619	print msg
620
621    def error(self, msg, token=-1):
622        if self.no_error:
623	    return
624
625        print "Parse Error: " + msg
626	if token != -1:
627	    print "Got token ", token
628	self.lexer.debug()
629	sys.exit(1)
630
631    def debug(self, msg, token=-1):
632        print "Debug: " + msg
633	if token != -1:
634	    print "Got token ", token
635	self.lexer.debug()
636
637    def parseTopComment(self, comment):
638	res = {}
639	lines = string.split(comment, "\n")
640	item = None
641	for line in lines:
642	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
643		line = line[1:]
644	    while line != "" and line[0] == '*':
645		line = line[1:]
646	    while line != "" and (line[0] == ' ' or line[0] == '\t'):
647		line = line[1:]
648	    try:
649		(it, line) = string.split(line, ":", 1)
650		item = it
651		while line != "" and (line[0] == ' ' or line[0] == '\t'):
652		    line = line[1:]
653		if res.has_key(item):
654		    res[item] = res[item] + " " + line
655		else:
656		    res[item] = line
657	    except:
658		if item != None:
659		    if res.has_key(item):
660			res[item] = res[item] + " " + line
661		    else:
662			res[item] = line
663	self.index.info = res
664
665    def parseComment(self, token):
666        if self.top_comment == "":
667	    self.top_comment = token[1]
668	if self.comment == None or token[1][0] == '*':
669	    self.comment = token[1];
670	else:
671	    self.comment = self.comment + token[1]
672	token = self.lexer.token()
673
674        if string.find(self.comment, "DOC_DISABLE") != -1:
675	    self.stop_error()
676
677        if string.find(self.comment, "DOC_ENABLE") != -1:
678	    self.start_error()
679
680	return token
681
682    #
683    # Parse a comment block associate to a typedef
684    #
685    def parseTypeComment(self, name, quiet = 0):
686        if name[0:2] == '__':
687	    quiet = 1
688
689        args = []
690	desc = ""
691
692        if self.comment == None:
693	    if not quiet:
694		self.warning("Missing comment for type %s" % (name))
695	    return((args, desc))
696        if self.comment[0] != '*':
697	    if not quiet:
698		self.warning("Missing * in type comment for %s" % (name))
699	    return((args, desc))
700	lines = string.split(self.comment, '\n')
701	if lines[0] == '*':
702	    del lines[0]
703	if lines[0] != "* %s:" % (name):
704	    if not quiet:
705		self.warning("Misformatted type comment for %s" % (name))
706		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
707	    return((args, desc))
708	del lines[0]
709	while len(lines) > 0 and lines[0] == '*':
710	    del lines[0]
711	desc = ""
712	while len(lines) > 0:
713	    l = lines[0]
714	    while len(l) > 0 and l[0] == '*':
715	        l = l[1:]
716	    l = string.strip(l)
717	    desc = desc + " " + l
718	    del lines[0]
719
720	desc = string.strip(desc)
721
722	if quiet == 0:
723	    if desc == "":
724	        self.warning("Type comment for %s lack description of the macro" % (name))
725
726	return(desc)
727    #
728    # Parse a comment block associate to a macro
729    #
730    def parseMacroComment(self, name, quiet = 0):
731        if name[0:2] == '__':
732	    quiet = 1
733
734        args = []
735	desc = ""
736
737        if self.comment == None:
738	    if not quiet:
739		self.warning("Missing comment for macro %s" % (name))
740	    return((args, desc))
741        if self.comment[0] != '*':
742	    if not quiet:
743		self.warning("Missing * in macro comment for %s" % (name))
744	    return((args, desc))
745	lines = string.split(self.comment, '\n')
746	if lines[0] == '*':
747	    del lines[0]
748	if lines[0] != "* %s:" % (name):
749	    if not quiet:
750		self.warning("Misformatted macro comment for %s" % (name))
751		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
752	    return((args, desc))
753	del lines[0]
754	while lines[0] == '*':
755	    del lines[0]
756	while len(lines) > 0 and lines[0][0:3] == '* @':
757	    l = lines[0][3:]
758	    try:
759	        (arg, desc) = string.split(l, ':', 1)
760		desc=string.strip(desc)
761		arg=string.strip(arg)
762            except:
763		if not quiet:
764		    self.warning("Misformatted macro comment for %s" % (name))
765		    self.warning("  problem with '%s'" % (lines[0]))
766		del lines[0]
767		continue
768	    del lines[0]
769	    l = string.strip(lines[0])
770	    while len(l) > 2 and l[0:3] != '* @':
771	        while l[0] == '*':
772		    l = l[1:]
773		desc = desc + ' ' + string.strip(l)
774		del lines[0]
775		if len(lines) == 0:
776		    break
777		l = lines[0]
778            args.append((arg, desc))
779	while len(lines) > 0 and lines[0] == '*':
780	    del lines[0]
781	desc = ""
782	while len(lines) > 0:
783	    l = lines[0]
784	    while len(l) > 0 and l[0] == '*':
785	        l = l[1:]
786	    l = string.strip(l)
787	    desc = desc + " " + l
788	    del lines[0]
789
790	desc = string.strip(desc)
791
792	if quiet == 0:
793	    if desc == "":
794	        self.warning("Macro comment for %s lack description of the macro" % (name))
795
796	return((args, desc))
797
798     #
799     # Parse a comment block and merge the informations found in the
800     # parameters descriptions, finally returns a block as complete
801     # as possible
802     #
803    def mergeFunctionComment(self, name, description, quiet = 0):
804        if name == 'main':
805	    quiet = 1
806        if name[0:2] == '__':
807	    quiet = 1
808
809	(ret, args) = description
810	desc = ""
811	retdesc = ""
812
813        if self.comment == None:
814	    if not quiet:
815		self.warning("Missing comment for function %s" % (name))
816	    return(((ret[0], retdesc), args, desc))
817        if self.comment[0] != '*':
818	    if not quiet:
819		self.warning("Missing * in function comment for %s" % (name))
820	    return(((ret[0], retdesc), args, desc))
821	lines = string.split(self.comment, '\n')
822	if lines[0] == '*':
823	    del lines[0]
824	if lines[0] != "* %s:" % (name):
825	    if not quiet:
826		self.warning("Misformatted function comment for %s" % (name))
827		self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
828	    return(((ret[0], retdesc), args, desc))
829	del lines[0]
830	while lines[0] == '*':
831	    del lines[0]
832	nbargs = len(args)
833	while len(lines) > 0 and lines[0][0:3] == '* @':
834	    l = lines[0][3:]
835	    try:
836	        (arg, desc) = string.split(l, ':', 1)
837		desc=string.strip(desc)
838		arg=string.strip(arg)
839            except:
840		if not quiet:
841		    self.warning("Misformatted function comment for %s" % (name))
842		    self.warning("  problem with '%s'" % (lines[0]))
843		del lines[0]
844		continue
845	    del lines[0]
846	    l = string.strip(lines[0])
847	    while len(l) > 2 and l[0:3] != '* @':
848	        while l[0] == '*':
849		    l = l[1:]
850		desc = desc + ' ' + string.strip(l)
851		del lines[0]
852		if len(lines) == 0:
853		    break
854		l = lines[0]
855	    i = 0
856	    while i < nbargs:
857	        if args[i][1] == arg:
858		    args[i] = (args[i][0], arg, desc)
859		    break;
860		i = i + 1
861	    if i >= nbargs:
862		if not quiet:
863		    self.warning("Unable to find arg %s from function comment for %s" % (
864		       arg, name))
865	while len(lines) > 0 and lines[0] == '*':
866	    del lines[0]
867	desc = ""
868	while len(lines) > 0:
869	    l = lines[0]
870	    while len(l) > 0 and l[0] == '*':
871	        l = l[1:]
872	    l = string.strip(l)
873	    if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
874	        try:
875		    l = string.split(l, ' ', 1)[1]
876		except:
877		    l = ""
878		retdesc = string.strip(l)
879		del lines[0]
880		while len(lines) > 0:
881		    l = lines[0]
882		    while len(l) > 0 and l[0] == '*':
883			l = l[1:]
884		    l = string.strip(l)
885		    retdesc = retdesc + " " + l
886		    del lines[0]
887	    else:
888	        desc = desc + " " + l
889		del lines[0]
890
891	retdesc = string.strip(retdesc)
892	desc = string.strip(desc)
893
894	if quiet == 0:
895	     #
896	     # report missing comments
897	     #
898	    i = 0
899	    while i < nbargs:
900	        if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
901		    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
902		i = i + 1
903	    if retdesc == "" and ret[0] != "void":
904		self.warning("Function comment for %s lacks description of return value" % (name))
905	    if desc == "":
906	        self.warning("Function comment for %s lacks description of the function" % (name))
907
908
909	return(((ret[0], retdesc), args, desc))
910
911    def parsePreproc(self, token):
912	if debug:
913	    print "=> preproc ", token, self.lexer.tokens
914        name = token[1]
915	if name == "#include":
916	    token = self.lexer.token()
917	    if token == None:
918	        return None
919	    if token[0] == 'preproc':
920		self.index_add(token[1], self.filename, not self.is_header,
921		                "include")
922		return self.lexer.token()
923	    return token
924	if name == "#define":
925	    token = self.lexer.token()
926	    if token == None:
927	        return None
928	    if token[0] == 'preproc':
929	         # TODO macros with arguments
930		name = token[1]
931	        lst = []
932		token = self.lexer.token()
933		while token != None and token[0] == 'preproc' and \
934		      token[1][0] != '#':
935		    lst.append(token[1])
936		    token = self.lexer.token()
937                try:
938		    name = string.split(name, '(') [0]
939                except:
940                    pass
941                info = self.parseMacroComment(name, not self.is_header)
942		self.index_add(name, self.filename, not self.is_header,
943		                "macro", info)
944		return token
945
946	#
947	# Processing of conditionals modified by Bill 1/1/05
948	#
949	# We process conditionals (i.e. tokens from #ifdef, #ifndef,
950	# #if, #else and #endif) for headers and mainline code,
951	# store the ones from the header in libxml2-api.xml, and later
952	# (in the routine merge_public) verify that the two (header and
953	# mainline code) agree.
954	#
955	# There is a small problem with processing the headers. Some of
956	# the variables are not concerned with enabling / disabling of
957	# library functions (e.g. '__XML_PARSER_H__'), and we don't want
958	# them to be included in libxml2-api.xml, or involved in
959	# the check between the header and the mainline code.  To
960	# accomplish this, we ignore any conditional which doesn't include
961	# the string 'ENABLED'
962	#
963	if name == "#ifdef":
964	    apstr = self.lexer.tokens[0][1]
965	    try:
966	        self.defines.append(apstr)
967		if string.find(apstr, 'ENABLED') != -1:
968		    self.conditionals.append("defined(%s)" % apstr)
969	    except:
970	        pass
971	elif name == "#ifndef":
972	    apstr = self.lexer.tokens[0][1]
973	    try:
974	        self.defines.append(apstr)
975		if string.find(apstr, 'ENABLED') != -1:
976		    self.conditionals.append("!defined(%s)" % apstr)
977	    except:
978	        pass
979	elif name == "#if":
980	    apstr = ""
981	    for tok in self.lexer.tokens:
982	        if apstr != "":
983		    apstr = apstr + " "
984	        apstr = apstr + tok[1]
985	    try:
986	        self.defines.append(apstr)
987		if string.find(apstr, 'ENABLED') != -1:
988		    self.conditionals.append(apstr)
989	    except:
990	        pass
991	elif name == "#else":
992	    if self.conditionals != [] and \
993	       string.find(self.defines[-1], 'ENABLED') != -1:
994	        self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
995	elif name == "#endif":
996	    if self.conditionals != [] and \
997	       string.find(self.defines[-1], 'ENABLED') != -1:
998	        self.conditionals = self.conditionals[:-1]
999	    self.defines = self.defines[:-1]
1000	token = self.lexer.token()
1001	while token != None and token[0] == 'preproc' and \
1002	    token[1][0] != '#':
1003	    token = self.lexer.token()
1004	return token
1005
1006     #
1007     # token acquisition on top of the lexer, it handle internally
1008     # preprocessor and comments since they are logically not part of
1009     # the program structure.
1010     #
1011    def token(self):
1012        global ignored_words
1013
1014        token = self.lexer.token()
1015	while token != None:
1016	    if token[0] == 'comment':
1017		token = self.parseComment(token)
1018		continue
1019	    elif token[0] == 'preproc':
1020		token = self.parsePreproc(token)
1021		continue
1022	    elif token[0] == "name" and token[1] == "__const":
1023	        token = ("name", "const")
1024		return token
1025	    elif token[0] == "name" and token[1] == "__attribute":
1026		token = self.lexer.token()
1027		while token != None and token[1] != ";":
1028		    token = self.lexer.token()
1029		return token
1030	    elif token[0] == "name" and ignored_words.has_key(token[1]):
1031	        (n, info) = ignored_words[token[1]]
1032		i = 0
1033		while i < n:
1034		    token = self.lexer.token()
1035		    i = i + 1
1036		token = self.lexer.token()
1037		continue
1038	    else:
1039	        if debug:
1040		    print "=> ", token
1041	        return token
1042	return None
1043
1044     #
1045     # Parse a typedef, it records the type and its name.
1046     #
1047    def parseTypedef(self, token):
1048        if token == None:
1049	    return None
1050	token = self.parseType(token)
1051	if token == None:
1052	    self.error("parsing typedef")
1053	    return None
1054	base_type = self.type
1055	type = base_type
1056	 #self.debug("end typedef type", token)
1057	while token != None:
1058	    if token[0] == "name":
1059		name = token[1]
1060		signature = self.signature
1061		if signature != None:
1062		    type = string.split(type, '(')[0]
1063		    d = self.mergeFunctionComment(name,
1064			    ((type, None), signature), 1)
1065		    self.index_add(name, self.filename, not self.is_header,
1066				    "functype", d)
1067		else:
1068		    if base_type == "struct":
1069			self.index_add(name, self.filename, not self.is_header,
1070					"struct", type)
1071			base_type = "struct " + name
1072	            else:
1073			# TODO report missing or misformatted comments
1074			info = self.parseTypeComment(name, 1)
1075			self.index_add(name, self.filename, not self.is_header,
1076		                    "typedef", type, info)
1077		token = self.token()
1078	    else:
1079		self.error("parsing typedef: expecting a name")
1080		return token
1081	     #self.debug("end typedef", token)
1082	    if token != None and token[0] == 'sep' and token[1] == ',':
1083	        type = base_type
1084	        token = self.token()
1085		while token != None and token[0] == "op":
1086		    type = type + token[1]
1087		    token = self.token()
1088	    elif token != None and token[0] == 'sep' and token[1] == ';':
1089	        break;
1090	    elif token != None and token[0] == 'name':
1091	        type = base_type
1092	        continue;
1093	    else:
1094		self.error("parsing typedef: expecting ';'", token)
1095		return token
1096	token = self.token()
1097	return token
1098
1099     #
1100     # Parse a C code block, used for functions it parse till
1101     # the balancing } included
1102     #
1103    def parseBlock(self, token):
1104        while token != None:
1105	    if token[0] == "sep" and token[1] == "{":
1106	        token = self.token()
1107		token = self.parseBlock(token)
1108	    elif token[0] == "sep" and token[1] == "}":
1109	        self.comment = None
1110	        token = self.token()
1111		return token
1112	    else:
1113	        if self.collect_ref == 1:
1114		    oldtok = token
1115		    token = self.token()
1116		    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1117		        if token[0] == "sep" and token[1] == "(":
1118			    self.index_add_ref(oldtok[1], self.filename,
1119			                        0, "function")
1120			    token = self.token()
1121			elif token[0] == "name":
1122			    token = self.token()
1123			    if token[0] == "sep" and (token[1] == ";" or
1124			       token[1] == "," or token[1] == "="):
1125				self.index_add_ref(oldtok[1], self.filename,
1126						    0, "type")
1127		    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1128			self.index_add_ref(oldtok[1], self.filename,
1129					    0, "typedef")
1130		    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1131			self.index_add_ref(oldtok[1], self.filename,
1132					    0, "typedef")
1133
1134		else:
1135		    token = self.token()
1136	return token
1137
1138     #
1139     # Parse a C struct definition till the balancing }
1140     #
1141    def parseStruct(self, token):
1142        fields = []
1143	 #self.debug("start parseStruct", token)
1144        while token != None:
1145	    if token[0] == "sep" and token[1] == "{":
1146	        token = self.token()
1147		token = self.parseTypeBlock(token)
1148	    elif token[0] == "sep" and token[1] == "}":
1149		self.struct_fields = fields
1150		 #self.debug("end parseStruct", token)
1151		 #print fields
1152	        token = self.token()
1153		return token
1154	    else:
1155	        base_type = self.type
1156		 #self.debug("before parseType", token)
1157		token = self.parseType(token)
1158		 #self.debug("after parseType", token)
1159		if token != None and token[0] == "name":
1160		    fname = token[1]
1161		    token = self.token()
1162		    if token[0] == "sep" and token[1] == ";":
1163		        self.comment = None
1164		        token = self.token()
1165			fields.append((self.type, fname, self.comment))
1166			self.comment = None
1167		    else:
1168		        self.error("parseStruct: expecting ;", token)
1169		elif token != None and token[0] == "sep" and token[1] == "{":
1170		    token = self.token()
1171		    token = self.parseTypeBlock(token)
1172		    if token != None and token[0] == "name":
1173			token = self.token()
1174		    if token != None and token[0] == "sep" and token[1] == ";":
1175			token = self.token()
1176		    else:
1177		        self.error("parseStruct: expecting ;", token)
1178		else:
1179		    self.error("parseStruct: name", token)
1180		    token = self.token()
1181		self.type = base_type;
1182        self.struct_fields = fields
1183	 #self.debug("end parseStruct", token)
1184	 #print fields
1185	return token
1186
1187     #
1188     # Parse a C enum block, parse till the balancing }
1189     #
1190    def parseEnumBlock(self, token):
1191        self.enums = []
1192	name = None
1193	self.comment = None
1194	comment = ""
1195	value = "0"
1196        while token != None:
1197	    if token[0] == "sep" and token[1] == "{":
1198	        token = self.token()
1199		token = self.parseTypeBlock(token)
1200	    elif token[0] == "sep" and token[1] == "}":
1201		if name != None:
1202		    if self.comment != None:
1203			comment = self.comment
1204			self.comment = None
1205		    self.enums.append((name, value, comment))
1206	        token = self.token()
1207		return token
1208	    elif token[0] == "name":
1209		    if name != None:
1210			if self.comment != None:
1211			    comment = string.strip(self.comment)
1212			    self.comment = None
1213			self.enums.append((name, value, comment))
1214		    name = token[1]
1215		    comment = ""
1216		    token = self.token()
1217		    if token[0] == "op" and token[1][0] == "=":
1218		        value = ""
1219		        if len(token[1]) > 1:
1220			    value = token[1][1:]
1221		        token = self.token()
1222		        while token[0] != "sep" or (token[1] != ',' and
1223			      token[1] != '}'):
1224			    value = value + token[1]
1225			    token = self.token()
1226		    else:
1227		        try:
1228			    value = "%d" % (int(value) + 1)
1229			except:
1230			    self.warning("Failed to compute value of enum %s" % (name))
1231			    value=""
1232		    if token[0] == "sep" and token[1] == ",":
1233			token = self.token()
1234	    else:
1235	        token = self.token()
1236	return token
1237
1238     #
1239     # Parse a C definition block, used for structs it parse till
1240     # the balancing }
1241     #
1242    def parseTypeBlock(self, token):
1243        while token != None:
1244	    if token[0] == "sep" and token[1] == "{":
1245	        token = self.token()
1246		token = self.parseTypeBlock(token)
1247	    elif token[0] == "sep" and token[1] == "}":
1248	        token = self.token()
1249		return token
1250	    else:
1251	        token = self.token()
1252	return token
1253
1254     #
1255     # Parse a type: the fact that the type name can either occur after
1256     #    the definition or within the definition makes it a little harder
1257     #    if inside, the name token is pushed back before returning
1258     #
1259    def parseType(self, token):
1260        self.type = ""
1261	self.struct_fields = []
1262        self.signature = None
1263	if token == None:
1264	    return token
1265
1266	while token[0] == "name" and (
1267	      token[1] == "const" or \
1268	      token[1] == "unsigned" or \
1269	      token[1] == "signed"):
1270	    if self.type == "":
1271	        self.type = token[1]
1272	    else:
1273	        self.type = self.type + " " + token[1]
1274	    token = self.token()
1275
1276        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1277	    if self.type == "":
1278	        self.type = token[1]
1279	    else:
1280	        self.type = self.type + " " + token[1]
1281	    if token[0] == "name" and token[1] == "int":
1282		if self.type == "":
1283		    self.type = tmp[1]
1284		else:
1285		    self.type = self.type + " " + tmp[1]
1286
1287        elif token[0] == "name" and token[1] == "struct":
1288	    if self.type == "":
1289	        self.type = token[1]
1290	    else:
1291	        self.type = self.type + " " + token[1]
1292	    token = self.token()
1293	    nametok = None
1294	    if token[0] == "name":
1295	        nametok = token
1296		token = self.token()
1297	    if token != None and token[0] == "sep" and token[1] == "{":
1298		token = self.token()
1299		token = self.parseStruct(token)
1300	    elif token != None and token[0] == "op" and token[1] == "*":
1301	        self.type = self.type + " " + nametok[1] + " *"
1302		token = self.token()
1303		while token != None and token[0] == "op" and token[1] == "*":
1304		    self.type = self.type + " *"
1305		    token = self.token()
1306		if token[0] == "name":
1307		    nametok = token
1308		    token = self.token()
1309		else:
1310		    self.error("struct : expecting name", token)
1311		    return token
1312	    elif token != None and token[0] == "name" and nametok != None:
1313	        self.type = self.type + " " + nametok[1]
1314		return token
1315
1316	    if nametok != None:
1317		self.lexer.push(token)
1318		token = nametok
1319	    return token
1320
1321        elif token[0] == "name" and token[1] == "enum":
1322	    if self.type == "":
1323	        self.type = token[1]
1324	    else:
1325	        self.type = self.type + " " + token[1]
1326	    self.enums = []
1327	    token = self.token()
1328	    if token != None and token[0] == "sep" and token[1] == "{":
1329		token = self.token()
1330		token = self.parseEnumBlock(token)
1331	    else:
1332		self.error("parsing enum: expecting '{'", token)
1333	    enum_type = None
1334	    if token != None and token[0] != "name":
1335	        self.lexer.push(token)
1336	        token = ("name", "enum")
1337	    else:
1338	        enum_type = token[1]
1339	    for enum in self.enums:
1340		self.index_add(enum[0], self.filename,
1341			       not self.is_header, "enum",
1342			       (enum[1], enum[2], enum_type))
1343	    return token
1344
1345	elif token[0] == "name":
1346	    if self.type == "":
1347	        self.type = token[1]
1348	    else:
1349	        self.type = self.type + " " + token[1]
1350	else:
1351	    self.error("parsing type %s: expecting a name" % (self.type),
1352	               token)
1353	    return token
1354	token = self.token()
1355        while token != None and (token[0] == "op" or
1356	      token[0] == "name" and token[1] == "const"):
1357	    self.type = self.type + " " + token[1]
1358	    token = self.token()
1359
1360	 #
1361	 # if there is a parenthesis here, this means a function type
1362	 #
1363	if token != None and token[0] == "sep" and token[1] == '(':
1364	    self.type = self.type + token[1]
1365	    token = self.token()
1366	    while token != None and token[0] == "op" and token[1] == '*':
1367	        self.type = self.type + token[1]
1368		token = self.token()
1369	    if token == None or token[0] != "name" :
1370		self.error("parsing function type, name expected", token);
1371	        return token
1372	    self.type = self.type + token[1]
1373	    nametok = token
1374	    token = self.token()
1375	    if token != None and token[0] == "sep" and token[1] == ')':
1376		self.type = self.type + token[1]
1377		token = self.token()
1378		if token != None and token[0] == "sep" and token[1] == '(':
1379		    token = self.token()
1380		    type = self.type;
1381		    token = self.parseSignature(token);
1382		    self.type = type;
1383		else:
1384		    self.error("parsing function type, '(' expected", token);
1385		    return token
1386	    else:
1387	        self.error("parsing function type, ')' expected", token);
1388		return token
1389	    self.lexer.push(token)
1390	    token = nametok
1391	    return token
1392
1393         #
1394	 # do some lookahead for arrays
1395	 #
1396	if token != None and token[0] == "name":
1397	    nametok = token
1398	    token = self.token()
1399	    if token != None and token[0] == "sep" and token[1] == '[':
1400	        self.type = self.type + nametok[1]
1401		while token != None and token[0] == "sep" and token[1] == '[':
1402		    self.type = self.type + token[1]
1403		    token = self.token()
1404		    while token != None and token[0] != 'sep' and \
1405		          token[1] != ']' and token[1] != ';':
1406			self.type = self.type + token[1]
1407			token = self.token()
1408		if token != None and token[0] == 'sep' and token[1] == ']':
1409		    self.type = self.type + token[1]
1410		    token = self.token()
1411		else:
1412		    self.error("parsing array type, ']' expected", token);
1413		    return token
1414	    elif token != None and token[0] == "sep" and token[1] == ':':
1415	         # remove :12 in case it's a limited int size
1416		token = self.token()
1417		token = self.token()
1418	    self.lexer.push(token)
1419	    token = nametok
1420
1421	return token
1422
1423     #
1424     # Parse a signature: '(' has been parsed and we scan the type definition
1425     #    up to the ')' included
1426    def parseSignature(self, token):
1427        signature = []
1428	if token != None and token[0] == "sep" and token[1] == ')':
1429	    self.signature = []
1430	    token = self.token()
1431	    return token
1432	while token != None:
1433	    token = self.parseType(token)
1434	    if token != None and token[0] == "name":
1435	        signature.append((self.type, token[1], None))
1436		token = self.token()
1437	    elif token != None and token[0] == "sep" and token[1] == ',':
1438		token = self.token()
1439		continue
1440	    elif token != None and token[0] == "sep" and token[1] == ')':
1441	         # only the type was provided
1442		if self.type == "...":
1443		    signature.append((self.type, "...", None))
1444		else:
1445		    signature.append((self.type, None, None))
1446	    if token != None and token[0] == "sep":
1447	        if token[1] == ',':
1448		    token = self.token()
1449		    continue
1450		elif token[1] == ')':
1451		    token = self.token()
1452		    break
1453	self.signature = signature
1454	return token
1455
1456     #
1457     # Parse a global definition, be it a type, variable or function
1458     # the extern "C" blocks are a bit nasty and require it to recurse.
1459     #
1460    def parseGlobal(self, token):
1461        static = 0
1462        if token[1] == 'extern':
1463	    token = self.token()
1464	    if token == None:
1465	        return token
1466	    if token[0] == 'string':
1467	        if token[1] == 'C':
1468		    token = self.token()
1469		    if token == None:
1470			return token
1471		    if token[0] == 'sep' and token[1] == "{":
1472		        token = self.token()
1473#			 print 'Entering extern "C line ', self.lineno()
1474			while token != None and (token[0] != 'sep' or
1475			      token[1] != "}"):
1476			    if token[0] == 'name':
1477				token = self.parseGlobal(token)
1478			    else:
1479				self.error(
1480				 "token %s %s unexpected at the top level" % (
1481					token[0], token[1]))
1482				token = self.parseGlobal(token)
1483#			 print 'Exiting extern "C" line', self.lineno()
1484			token = self.token()
1485			return token
1486		else:
1487		    return token
1488	elif token[1] == 'static':
1489	    static = 1
1490	    token = self.token()
1491	    if token == None or  token[0] != 'name':
1492	        return token
1493
1494	if token[1] == 'typedef':
1495	    token = self.token()
1496	    return self.parseTypedef(token)
1497	else:
1498	    token = self.parseType(token)
1499	    type_orig = self.type
1500	if token == None or token[0] != "name":
1501	    return token
1502	type = type_orig
1503	self.name = token[1]
1504	token = self.token()
1505	while token != None and (token[0] == "sep" or token[0] == "op"):
1506	    if token[0] == "sep":
1507		if token[1] == "[":
1508		    type = type + token[1]
1509		    token = self.token()
1510		    while token != None and (token[0] != "sep" or \
1511		          token[1] != ";"):
1512			type = type + token[1]
1513			token = self.token()
1514
1515	    if token != None and token[0] == "op" and token[1] == "=":
1516		 #
1517		 # Skip the initialization of the variable
1518		 #
1519		token = self.token()
1520		if token[0] == 'sep' and token[1] == '{':
1521		    token = self.token()
1522		    token = self.parseBlock(token)
1523		else:
1524		    self.comment = None
1525		    while token != None and (token[0] != "sep" or \
1526			  (token[1] != ';' and token[1] != ',')):
1527			    token = self.token()
1528		self.comment = None
1529		if token == None or token[0] != "sep" or (token[1] != ';' and
1530		   token[1] != ','):
1531		    self.error("missing ';' or ',' after value")
1532
1533	    if token != None and token[0] == "sep":
1534		if token[1] == ";":
1535		    self.comment = None
1536		    token = self.token()
1537		    if type == "struct":
1538		        self.index_add(self.name, self.filename,
1539			     not self.is_header, "struct", self.struct_fields)
1540		    else:
1541			self.index_add(self.name, self.filename,
1542			     not self.is_header, "variable", type)
1543		    break
1544		elif token[1] == "(":
1545		    token = self.token()
1546		    token = self.parseSignature(token)
1547		    if token == None:
1548			return None
1549		    if token[0] == "sep" and token[1] == ";":
1550		        d = self.mergeFunctionComment(self.name,
1551				((type, None), self.signature), 1)
1552			self.index_add(self.name, self.filename, static,
1553			                "function", d)
1554			token = self.token()
1555		    elif token[0] == "sep" and token[1] == "{":
1556		        d = self.mergeFunctionComment(self.name,
1557				((type, None), self.signature), static)
1558			self.index_add(self.name, self.filename, static,
1559			                "function", d)
1560			token = self.token()
1561			token = self.parseBlock(token);
1562		elif token[1] == ',':
1563		    self.comment = None
1564		    self.index_add(self.name, self.filename, static,
1565		                    "variable", type)
1566		    type = type_orig
1567		    token = self.token()
1568		    while token != None and token[0] == "sep":
1569		        type = type + token[1]
1570			token = self.token()
1571		    if token != None and token[0] == "name":
1572		        self.name = token[1]
1573			token = self.token()
1574		else:
1575		    break
1576
1577	return token
1578
1579    def parse(self):
1580        self.warning("Parsing %s" % (self.filename))
1581        token = self.token()
1582	while token != None:
1583            if token[0] == 'name':
1584	        token = self.parseGlobal(token)
1585            else:
1586	        self.error("token %s %s unexpected at the top level" % (
1587		       token[0], token[1]))
1588		token = self.parseGlobal(token)
1589		return
1590	self.parseTopComment(self.top_comment)
1591        return self.index
1592
1593
1594class docBuilder:
1595    """A documentation builder"""
1596    def __init__(self, name, directories=['.'], excludes=[]):
1597        self.name = name
1598        self.directories = directories
1599	self.excludes = excludes + ignored_files.keys()
1600	self.modules = {}
1601	self.headers = {}
1602	self.idx = index()
1603        self.xref = {}
1604	self.index = {}
1605	if name == 'libxml2':
1606	    self.basename = 'libxml'
1607	else:
1608	    self.basename = name
1609
1610    def indexString(self, id, str):
1611	if str == None:
1612	    return
1613	str = string.replace(str, "'", ' ')
1614	str = string.replace(str, '"', ' ')
1615	str = string.replace(str, "/", ' ')
1616	str = string.replace(str, '*', ' ')
1617	str = string.replace(str, "[", ' ')
1618	str = string.replace(str, "]", ' ')
1619	str = string.replace(str, "(", ' ')
1620	str = string.replace(str, ")", ' ')
1621	str = string.replace(str, "<", ' ')
1622	str = string.replace(str, '>', ' ')
1623	str = string.replace(str, "&", ' ')
1624	str = string.replace(str, '#', ' ')
1625	str = string.replace(str, ",", ' ')
1626	str = string.replace(str, '.', ' ')
1627	str = string.replace(str, ';', ' ')
1628	tokens = string.split(str)
1629	for token in tokens:
1630	    try:
1631		c = token[0]
1632		if string.find(string.letters, c) < 0:
1633		    pass
1634		elif len(token) < 3:
1635		    pass
1636		else:
1637		    lower = string.lower(token)
1638		    # TODO: generalize this a bit
1639		    if lower == 'and' or lower == 'the':
1640			pass
1641		    elif self.xref.has_key(token):
1642			self.xref[token].append(id)
1643		    else:
1644			self.xref[token] = [id]
1645	    except:
1646		pass
1647
1648    def analyze(self):
1649        print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1650	self.idx.analyze()
1651
1652    def scanHeaders(self):
1653	for header in self.headers.keys():
1654	    parser = CParser(header)
1655	    idx = parser.parse()
1656	    self.headers[header] = idx;
1657	    self.idx.merge(idx)
1658
1659    def scanModules(self):
1660	for module in self.modules.keys():
1661	    parser = CParser(module)
1662	    idx = parser.parse()
1663	    # idx.analyze()
1664	    self.modules[module] = idx
1665	    self.idx.merge_public(idx)
1666
1667    def scan(self):
1668        for directory in self.directories:
1669	    files = glob.glob(directory + "/*.c")
1670	    for file in files:
1671	        skip = 0
1672		for excl in self.excludes:
1673		    if string.find(file, excl) != -1:
1674		        skip = 1;
1675			break
1676		if skip == 0:
1677		    self.modules[file] = None;
1678	    files = glob.glob(directory + "/*.h")
1679	    for file in files:
1680	        skip = 0
1681		for excl in self.excludes:
1682		    if string.find(file, excl) != -1:
1683		        skip = 1;
1684			break
1685		if skip == 0:
1686		    self.headers[file] = None;
1687	self.scanHeaders()
1688	self.scanModules()
1689
1690    def modulename_file(self, file):
1691        module = os.path.basename(file)
1692	if module[-2:] == '.h':
1693	    module = module[:-2]
1694	elif module[-2:] == '.c':
1695	    module = module[:-2]
1696	return module
1697
1698    def serialize_enum(self, output, name):
1699        id = self.idx.enums[name]
1700        output.write("    <enum name='%s' file='%s'" % (name,
1701	             self.modulename_file(id.header)))
1702	if id.info != None:
1703	    info = id.info
1704	    if info[0] != None and info[0] != '':
1705	        try:
1706		    val = eval(info[0])
1707		except:
1708		    val = info[0]
1709		output.write(" value='%s'" % (val));
1710	    if info[2] != None and info[2] != '':
1711		output.write(" type='%s'" % info[2]);
1712	    if info[1] != None and info[1] != '':
1713		output.write(" info='%s'" % escape(info[1]));
1714        output.write("/>\n")
1715
1716    def serialize_macro(self, output, name):
1717        id = self.idx.macros[name]
1718        output.write("    <macro name='%s' file='%s'>\n" % (name,
1719	             self.modulename_file(id.header)))
1720	if id.info != None:
1721            try:
1722		(args, desc) = id.info
1723		if desc != None and desc != "":
1724		    output.write("      <info>%s</info>\n" % (escape(desc)))
1725		    self.indexString(name, desc)
1726		for arg in args:
1727		    (name, desc) = arg
1728		    if desc != None and desc != "":
1729			output.write("      <arg name='%s' info='%s'/>\n" % (
1730				     name, escape(desc)))
1731			self.indexString(name, desc)
1732		    else:
1733			output.write("      <arg name='%s'/>\n" % (name))
1734            except:
1735                pass
1736        output.write("    </macro>\n")
1737
1738    def serialize_typedef(self, output, name):
1739        id = self.idx.typedefs[name]
1740	if name == 'xmlChar':
1741	    print id
1742	if id.info[0:7] == 'struct ':
1743	    output.write("    <struct name='%s' file='%s' type='%s'" % (
1744	             name, self.modulename_file(id.header), id.info))
1745	    name = id.info[7:]
1746	    if self.idx.structs.has_key(name) and ( \
1747	       type(self.idx.structs[name].info) == type(()) or
1748		type(self.idx.structs[name].info) == type([])):
1749	        output.write(">\n");
1750		try:
1751		    for field in self.idx.structs[name].info:
1752			desc = field[2]
1753			self.indexString(name, desc)
1754			if desc == None:
1755			    desc = ''
1756			else:
1757			    desc = escape(desc)
1758			output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1759		except:
1760		    print "Failed to serialize struct %s" % (name)
1761		output.write("    </struct>\n")
1762	    else:
1763	        output.write("/>\n");
1764	else :
1765	    output.write("    <typedef name='%s' file='%s' type='%s'" % (
1766	                 name, self.modulename_file(id.header), id.info))
1767            try:
1768		desc = id.extra
1769		if desc != None and desc != "":
1770		    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1771		    output.write("    </typedef>\n")
1772		else:
1773		    output.write("/>\n")
1774	    except:
1775		output.write("/>\n")
1776
1777    def serialize_variable(self, output, name):
1778        id = self.idx.variables[name]
1779	if id.info != None:
1780	    output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1781		    name, self.modulename_file(id.header), id.info))
1782	else:
1783	    output.write("    <variable name='%s' file='%s'/>\n" % (
1784	            name, self.modulename_file(id.header)))
1785
1786    def serialize_function(self, output, name):
1787        id = self.idx.functions[name]
1788	if name == debugsym:
1789	    print "=>", id
1790
1791        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1792	             name, self.modulename_file(id.header),
1793		     self.modulename_file(id.module)))
1794	#
1795	# Processing of conditionals modified by Bill 1/1/05
1796	#
1797	if id.conditionals != None:
1798	    apstr = ""
1799	    for cond in id.conditionals:
1800	        if apstr != "":
1801		    apstr = apstr + " &amp;&amp; "
1802		apstr = apstr + cond
1803	    output.write("      <cond>%s</cond>\n"% (apstr));
1804	try:
1805	    (ret, params, desc) = id.info
1806	    output.write("      <info>%s</info>\n" % (escape(desc)))
1807	    self.indexString(name, desc)
1808	    if ret[0] != None:
1809	        if ret[0] == "void":
1810		    output.write("      <return type='void'/>\n")
1811		else:
1812		    output.write("      <return type='%s' info='%s'/>\n" % (
1813			     ret[0], escape(ret[1])))
1814		    self.indexString(name, ret[1])
1815	    for param in params:
1816	        if param[0] == 'void':
1817		    continue
1818	        if param[2] == None:
1819		    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1820		else:
1821		    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1822		    self.indexString(name, param[2])
1823	except:
1824	    print "Failed to save function %s info: " % name, `id.info`
1825        output.write("    </%s>\n" % (id.type))
1826
1827    def serialize_exports(self, output, file):
1828        module = self.modulename_file(file)
1829	output.write("    <file name='%s'>\n" % (module))
1830	dict = self.headers[file]
1831	if dict.info != None:
1832	    for data in ('Summary', 'Description', 'Author'):
1833		try:
1834		    output.write("     <%s>%s</%s>\n" % (
1835		                 string.lower(data),
1836				 escape(dict.info[data]),
1837				 string.lower(data)))
1838		except:
1839		    print "Header %s lacks a %s description" % (module, data)
1840	    if dict.info.has_key('Description'):
1841	        desc = dict.info['Description']
1842		if string.find(desc, "DEPRECATED") != -1:
1843		    output.write("     <deprecated/>\n")
1844
1845        ids = dict.macros.keys()
1846	ids.sort()
1847	for id in uniq(ids):
1848	    # Macros are sometime used to masquerade other types.
1849	    if dict.functions.has_key(id):
1850	        continue
1851	    if dict.variables.has_key(id):
1852	        continue
1853	    if dict.typedefs.has_key(id):
1854	        continue
1855	    if dict.structs.has_key(id):
1856	        continue
1857	    if dict.enums.has_key(id):
1858	        continue
1859	    output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1860        ids = dict.enums.keys()
1861	ids.sort()
1862	for id in uniq(ids):
1863	    output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1864        ids = dict.typedefs.keys()
1865	ids.sort()
1866	for id in uniq(ids):
1867	    output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1868        ids = dict.structs.keys()
1869	ids.sort()
1870	for id in uniq(ids):
1871	    output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1872        ids = dict.variables.keys()
1873	ids.sort()
1874	for id in uniq(ids):
1875	    output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1876        ids = dict.functions.keys()
1877	ids.sort()
1878	for id in uniq(ids):
1879	    output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1880	output.write("    </file>\n")
1881
1882    def serialize_xrefs_files(self, output):
1883        headers = self.headers.keys()
1884        headers.sort()
1885        for file in headers:
1886	    module = self.modulename_file(file)
1887	    output.write("    <file name='%s'>\n" % (module))
1888	    dict = self.headers[file]
1889	    ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1890		  dict.macros.keys() + dict.typedefs.keys() + \
1891		  dict.structs.keys() + dict.enums.keys())
1892	    ids.sort()
1893	    for id in ids:
1894		output.write("      <ref name='%s'/>\n" % (id))
1895	    output.write("    </file>\n")
1896        pass
1897
1898    def serialize_xrefs_functions(self, output):
1899        funcs = {}
1900	for name in self.idx.functions.keys():
1901	    id = self.idx.functions[name]
1902	    try:
1903		(ret, params, desc) = id.info
1904		for param in params:
1905		    if param[0] == 'void':
1906			continue
1907		    if funcs.has_key(param[0]):
1908		        funcs[param[0]].append(name)
1909		    else:
1910		        funcs[param[0]] = [name]
1911	    except:
1912	        pass
1913	typ = funcs.keys()
1914	typ.sort()
1915	for type in typ:
1916	    if type == '' or type == 'void' or type == "int" or \
1917	       type == "char *" or type == "const char *" :
1918	        continue
1919	    output.write("    <type name='%s'>\n" % (type))
1920	    ids = funcs[type]
1921	    ids.sort()
1922	    pid = ''	# not sure why we have dups, but get rid of them!
1923	    for id in ids:
1924	        if id != pid:
1925	            output.write("      <ref name='%s'/>\n" % (id))
1926		    pid = id
1927	    output.write("    </type>\n")
1928
1929    def serialize_xrefs_constructors(self, output):
1930        funcs = {}
1931	for name in self.idx.functions.keys():
1932	    id = self.idx.functions[name]
1933	    try:
1934		(ret, params, desc) = id.info
1935		if ret[0] == "void":
1936		    continue
1937		if funcs.has_key(ret[0]):
1938		    funcs[ret[0]].append(name)
1939		else:
1940		    funcs[ret[0]] = [name]
1941	    except:
1942	        pass
1943	typ = funcs.keys()
1944	typ.sort()
1945	for type in typ:
1946	    if type == '' or type == 'void' or type == "int" or \
1947	       type == "char *" or type == "const char *" :
1948	        continue
1949	    output.write("    <type name='%s'>\n" % (type))
1950	    ids = funcs[type]
1951	    ids.sort()
1952	    for id in ids:
1953	        output.write("      <ref name='%s'/>\n" % (id))
1954	    output.write("    </type>\n")
1955
1956    def serialize_xrefs_alpha(self, output):
1957	letter = None
1958	ids = self.idx.identifiers.keys()
1959	ids.sort()
1960	for id in ids:
1961	    if id[0] != letter:
1962		if letter != None:
1963		    output.write("    </letter>\n")
1964		letter = id[0]
1965		output.write("    <letter name='%s'>\n" % (letter))
1966	    output.write("      <ref name='%s'/>\n" % (id))
1967	if letter != None:
1968	    output.write("    </letter>\n")
1969
1970    def serialize_xrefs_references(self, output):
1971        typ = self.idx.identifiers.keys()
1972	typ.sort()
1973	for id in typ:
1974	    idf = self.idx.identifiers[id]
1975	    module = idf.header
1976	    output.write("    <reference name='%s' href='%s'/>\n" % (id,
1977	                 'html/' + self.basename + '-' +
1978		         self.modulename_file(module) + '.html#' +
1979			 id))
1980
1981    def serialize_xrefs_index(self, output):
1982        index = self.xref
1983	typ = index.keys()
1984	typ.sort()
1985	letter = None
1986	count = 0
1987	chunk = 0
1988	chunks = []
1989	for id in typ:
1990	    if len(index[id]) > 30:
1991		continue
1992	    if id[0] != letter:
1993		if letter == None or count > 200:
1994		    if letter != None:
1995			output.write("      </letter>\n")
1996			output.write("    </chunk>\n")
1997			count = 0
1998			chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1999		    output.write("    <chunk name='chunk%s'>\n" % (chunk))
2000		    first_letter = id[0]
2001		    chunk = chunk + 1
2002		elif letter != None:
2003		    output.write("      </letter>\n")
2004		letter = id[0]
2005		output.write("      <letter name='%s'>\n" % (letter))
2006	    output.write("        <word name='%s'>\n" % (id))
2007	    tokens = index[id];
2008	    tokens.sort()
2009	    tok = None
2010	    for token in tokens:
2011		if tok == token:
2012		    continue
2013		tok = token
2014		output.write("          <ref name='%s'/>\n" % (token))
2015		count = count + 1
2016	    output.write("        </word>\n")
2017	if letter != None:
2018	    output.write("      </letter>\n")
2019	    output.write("    </chunk>\n")
2020	    if count != 0:
2021	        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2022	    output.write("    <chunks>\n")
2023	    for ch in chunks:
2024		output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
2025			     ch[0], ch[1], ch[2]))
2026	    output.write("    </chunks>\n")
2027
2028    def serialize_xrefs(self, output):
2029	output.write("  <references>\n")
2030	self.serialize_xrefs_references(output)
2031	output.write("  </references>\n")
2032	output.write("  <alpha>\n")
2033	self.serialize_xrefs_alpha(output)
2034	output.write("  </alpha>\n")
2035	output.write("  <constructors>\n")
2036	self.serialize_xrefs_constructors(output)
2037	output.write("  </constructors>\n")
2038	output.write("  <functions>\n")
2039	self.serialize_xrefs_functions(output)
2040	output.write("  </functions>\n")
2041	output.write("  <files>\n")
2042	self.serialize_xrefs_files(output)
2043	output.write("  </files>\n")
2044	output.write("  <index>\n")
2045	self.serialize_xrefs_index(output)
2046	output.write("  </index>\n")
2047
2048    def serialize(self):
2049        filename = "%s-api.xml" % self.name
2050        print "Saving XML description %s" % (filename)
2051        output = open(filename, "w")
2052        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2053        output.write("<api name='%s'>\n" % self.name)
2054        output.write("  <files>\n")
2055        headers = self.headers.keys()
2056        headers.sort()
2057        for file in headers:
2058            self.serialize_exports(output, file)
2059        output.write("  </files>\n")
2060        output.write("  <symbols>\n")
2061        macros = self.idx.macros.keys()
2062        macros.sort()
2063        for macro in macros:
2064            self.serialize_macro(output, macro)
2065        enums = self.idx.enums.keys()
2066        enums.sort()
2067        for enum in enums:
2068            self.serialize_enum(output, enum)
2069        typedefs = self.idx.typedefs.keys()
2070        typedefs.sort()
2071        for typedef in typedefs:
2072            self.serialize_typedef(output, typedef)
2073        variables = self.idx.variables.keys()
2074        variables.sort()
2075        for variable in variables:
2076            self.serialize_variable(output, variable)
2077        functions = self.idx.functions.keys()
2078        functions.sort()
2079        for function in functions:
2080            self.serialize_function(output, function)
2081        output.write("  </symbols>\n")
2082        output.write("</api>\n")
2083        output.close()
2084
2085        filename = "%s-refs.xml" % self.name
2086        print "Saving XML Cross References %s" % (filename)
2087        output = open(filename, "w")
2088        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2089        output.write("<apirefs name='%s'>\n" % self.name)
2090        self.serialize_xrefs(output)
2091        output.write("</apirefs>\n")
2092        output.close()
2093
2094
2095def rebuild():
2096    builder = None
2097    if glob.glob("parser.c") != [] :
2098        print "Rebuilding API description for libxml2"
2099	builder = docBuilder("libxml2", [".", "."],
2100	                     ["xmlwin32version.h", "tst.c"])
2101    elif glob.glob("../parser.c") != [] :
2102        print "Rebuilding API description for libxml2"
2103	builder = docBuilder("libxml2", ["..", "../include/libxml"],
2104	                     ["xmlwin32version.h", "tst.c"])
2105    elif glob.glob("../libxslt/transform.c") != [] :
2106        print "Rebuilding API description for libxslt"
2107	builder = docBuilder("libxslt", ["../libxslt"],
2108	                     ["win32config.h", "libxslt.h", "tst.c"])
2109    else:
2110        print "rebuild() failed, unable to guess the module"
2111	return None
2112    builder.scan()
2113    builder.analyze()
2114    builder.serialize()
2115    if glob.glob("../libexslt/exslt.c") != [] :
2116        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2117	extra.scan()
2118	extra.analyze()
2119	extra.serialize()
2120    return builder
2121
2122#
2123# for debugging the parser
2124#
2125def parse(filename):
2126    parser = CParser(filename)
2127    idx = parser.parse()
2128    return idx
2129
2130if __name__ == "__main__":
2131    if len(sys.argv) > 1:
2132        debug = 1
2133        parse(sys.argv[1])
2134    else:
2135	rebuild()
2136