apibuild.py revision 99b78502b649a03a00b0ec83288a2e7216da7a17
1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "acconfig.h": "generated portability layer", 27 "config.h": "generated portability layer", 28 "libxml.h": "internal only", 29 "testOOM.c": "out of memory tester", 30 "testOOMlib.h": "out of memory tester", 31 "testOOMlib.c": "out of memory tester", 32 "rngparser.c": "not yet integrated", 33 "rngparser.h": "not yet integrated", 34 "elfgcchack.h": "not a normal header", 35 "testHTML.c": "test tool", 36 "testReader.c": "test tool", 37 "testSchemas.c": "test tool", 38 "testXPath.c": "test tool", 39 "testAutomata.c": "test tool", 40 "testModule.c": "test tool", 41 "testRegexp.c": "test tool", 42 "testThreads.c": "test tool", 43 "testC14N.c": "test tool", 44 "testRelax.c": "test tool", 45 "testThreadsWin32.c": "test tool", 46 "testSAX.c": "test tool", 47 "testURI.c": "test tool", 48 "testapi.c": "generated regression tests", 49 "runtest.c": "regression tests program", 50 "runsuite.c": "regression tests program", 51 "tst.c": "not part of the library", 52 "testdso.c": "test for dynamid shared libraries", 53} 54 55ignored_words = { 56 "WINAPI": (0, "Windows keyword"), 57 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 58 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 59 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 60 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 61 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 62 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 63 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 64 "XMLCALL": (0, "Special macro for win32 calls"), 65 "XSLTCALL": (0, "Special macro for win32 calls"), 66 "XMLCDECL": (0, "Special macro for win32 calls"), 67 "EXSLTCALL": (0, "Special macro for win32 calls"), 68 "__declspec": (3, "Windows keyword"), 69 "__stdcall": (0, "Windows keyword"), 70 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 71 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 72 "X_IN_Y": (5, "macro function builder"), 73} 74 75def escape(raw): 76 raw = string.replace(raw, '&', '&') 77 raw = string.replace(raw, '<', '<') 78 raw = string.replace(raw, '>', '>') 79 raw = string.replace(raw, "'", ''') 80 raw = string.replace(raw, '"', '"') 81 return raw 82 83def uniq(items): 84 d = {} 85 for item in items: 86 d[item]=1 87 return d.keys() 88 89class identifier: 90 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 91 info=None, extra=None, conditionals = None): 92 self.name = name 93 self.header = header 94 self.module = module 95 self.type = type 96 self.info = info 97 self.extra = extra 98 self.lineno = lineno 99 self.static = 0 100 if conditionals == None or len(conditionals) == 0: 101 self.conditionals = None 102 else: 103 self.conditionals = conditionals[:] 104 if self.name == debugsym: 105 print "=> define %s : %s" % (debugsym, (module, type, info, 106 extra, conditionals)) 107 108 def __repr__(self): 109 r = "%s %s:" % (self.type, self.name) 110 if self.static: 111 r = r + " static" 112 if self.module != None: 113 r = r + " from %s" % (self.module) 114 if self.info != None: 115 r = r + " " + `self.info` 116 if self.extra != None: 117 r = r + " " + `self.extra` 118 if self.conditionals != None: 119 r = r + " " + `self.conditionals` 120 return r 121 122 123 def set_header(self, header): 124 self.header = header 125 def set_module(self, module): 126 self.module = module 127 def set_type(self, type): 128 self.type = type 129 def set_info(self, info): 130 self.info = info 131 def set_extra(self, extra): 132 self.extra = extra 133 def set_lineno(self, lineno): 134 self.lineno = lineno 135 def set_static(self, static): 136 self.static = static 137 def set_conditionals(self, conditionals): 138 if conditionals == None or len(conditionals) == 0: 139 self.conditionals = None 140 else: 141 self.conditionals = conditionals[:] 142 143 def get_name(self): 144 return self.name 145 def get_header(self): 146 return self.module 147 def get_module(self): 148 return self.module 149 def get_type(self): 150 return self.type 151 def get_info(self): 152 return self.info 153 def get_lineno(self): 154 return self.lineno 155 def get_extra(self): 156 return self.extra 157 def get_static(self): 158 return self.static 159 def get_conditionals(self): 160 return self.conditionals 161 162 def update(self, header, module, type = None, info = None, extra=None, 163 conditionals=None): 164 if self.name == debugsym: 165 print "=> update %s : %s" % (debugsym, (module, type, info, 166 extra, conditionals)) 167 if header != None and self.header == None: 168 self.set_header(module) 169 if module != None and (self.module == None or self.header == self.module): 170 self.set_module(module) 171 if type != None and self.type == None: 172 self.set_type(type) 173 if info != None: 174 self.set_info(info) 175 if extra != None: 176 self.set_extra(extra) 177 if conditionals != None: 178 self.set_conditionals(conditionals) 179 180class index: 181 def __init__(self, name = "noname"): 182 self.name = name 183 self.identifiers = {} 184 self.functions = {} 185 self.variables = {} 186 self.includes = {} 187 self.structs = {} 188 self.enums = {} 189 self.typedefs = {} 190 self.macros = {} 191 self.references = {} 192 self.info = {} 193 194 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 195 if name[0:2] == '__': 196 return None 197 d = None 198 try: 199 d = self.identifiers[name] 200 d.update(header, module, type, lineno, info, extra, conditionals) 201 except: 202 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 203 self.identifiers[name] = d 204 205 if d != None and static == 1: 206 d.set_static(1) 207 208 if d != None and name != None and type != None: 209 self.references[name] = d 210 211 if name == debugsym: 212 print "New ref: %s" % (d) 213 214 return d 215 216 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 217 if name[0:2] == '__': 218 return None 219 d = None 220 try: 221 d = self.identifiers[name] 222 d.update(header, module, type, lineno, info, extra, conditionals) 223 except: 224 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 225 self.identifiers[name] = d 226 227 if d != None and static == 1: 228 d.set_static(1) 229 230 if d != None and name != None and type != None: 231 if type == "function": 232 self.functions[name] = d 233 elif type == "functype": 234 self.functions[name] = d 235 elif type == "variable": 236 self.variables[name] = d 237 elif type == "include": 238 self.includes[name] = d 239 elif type == "struct": 240 self.structs[name] = d 241 elif type == "enum": 242 self.enums[name] = d 243 elif type == "typedef": 244 self.typedefs[name] = d 245 elif type == "macro": 246 self.macros[name] = d 247 else: 248 print "Unable to register type ", type 249 250 if name == debugsym: 251 print "New symbol: %s" % (d) 252 253 return d 254 255 def merge(self, idx): 256 for id in idx.functions.keys(): 257 # 258 # macro might be used to override functions or variables 259 # definitions 260 # 261 if self.macros.has_key(id): 262 del self.macros[id] 263 if self.functions.has_key(id): 264 print "function %s from %s redeclared in %s" % ( 265 id, self.functions[id].header, idx.functions[id].header) 266 else: 267 self.functions[id] = idx.functions[id] 268 self.identifiers[id] = idx.functions[id] 269 for id in idx.variables.keys(): 270 # 271 # macro might be used to override functions or variables 272 # definitions 273 # 274 if self.macros.has_key(id): 275 del self.macros[id] 276 if self.variables.has_key(id): 277 print "variable %s from %s redeclared in %s" % ( 278 id, self.variables[id].header, idx.variables[id].header) 279 else: 280 self.variables[id] = idx.variables[id] 281 self.identifiers[id] = idx.variables[id] 282 for id in idx.structs.keys(): 283 if self.structs.has_key(id): 284 print "struct %s from %s redeclared in %s" % ( 285 id, self.structs[id].header, idx.structs[id].header) 286 else: 287 self.structs[id] = idx.structs[id] 288 self.identifiers[id] = idx.structs[id] 289 for id in idx.typedefs.keys(): 290 if self.typedefs.has_key(id): 291 print "typedef %s from %s redeclared in %s" % ( 292 id, self.typedefs[id].header, idx.typedefs[id].header) 293 else: 294 self.typedefs[id] = idx.typedefs[id] 295 self.identifiers[id] = idx.typedefs[id] 296 for id in idx.macros.keys(): 297 # 298 # macro might be used to override functions or variables 299 # definitions 300 # 301 if self.variables.has_key(id): 302 continue 303 if self.functions.has_key(id): 304 continue 305 if self.enums.has_key(id): 306 continue 307 if self.macros.has_key(id): 308 print "macro %s from %s redeclared in %s" % ( 309 id, self.macros[id].header, idx.macros[id].header) 310 else: 311 self.macros[id] = idx.macros[id] 312 self.identifiers[id] = idx.macros[id] 313 for id in idx.enums.keys(): 314 if self.enums.has_key(id): 315 print "enum %s from %s redeclared in %s" % ( 316 id, self.enums[id].header, idx.enums[id].header) 317 else: 318 self.enums[id] = idx.enums[id] 319 self.identifiers[id] = idx.enums[id] 320 321 def merge_public(self, idx): 322 for id in idx.functions.keys(): 323 if self.functions.has_key(id): 324 # check that function condition agrees with header 325 if idx.functions[id].conditionals != \ 326 self.functions[id].conditionals: 327 print "Header condition differs from Function for %s:" \ 328 % id 329 print " H: %s" % self.functions[id].conditionals 330 print " C: %s" % idx.functions[id].conditionals 331 up = idx.functions[id] 332 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 333 # else: 334 # print "Function %s from %s is not declared in headers" % ( 335 # id, idx.functions[id].module) 336 # TODO: do the same for variables. 337 338 def analyze_dict(self, type, dict): 339 count = 0 340 public = 0 341 for name in dict.keys(): 342 id = dict[name] 343 count = count + 1 344 if id.static == 0: 345 public = public + 1 346 if count != public: 347 print " %d %s , %d public" % (count, type, public) 348 elif count != 0: 349 print " %d public %s" % (count, type) 350 351 352 def analyze(self): 353 self.analyze_dict("functions", self.functions) 354 self.analyze_dict("variables", self.variables) 355 self.analyze_dict("structs", self.structs) 356 self.analyze_dict("typedefs", self.typedefs) 357 self.analyze_dict("macros", self.macros) 358 359class CLexer: 360 """A lexer for the C language, tokenize the input by reading and 361 analyzing it line by line""" 362 def __init__(self, input): 363 self.input = input 364 self.tokens = [] 365 self.line = "" 366 self.lineno = 0 367 368 def getline(self): 369 line = '' 370 while line == '': 371 line = self.input.readline() 372 if not line: 373 return None 374 self.lineno = self.lineno + 1 375 line = string.lstrip(line) 376 line = string.rstrip(line) 377 if line == '': 378 continue 379 while line[-1] == '\\': 380 line = line[:-1] 381 n = self.input.readline() 382 self.lineno = self.lineno + 1 383 n = string.lstrip(n) 384 n = string.rstrip(n) 385 if not n: 386 break 387 else: 388 line = line + n 389 return line 390 391 def getlineno(self): 392 return self.lineno 393 394 def push(self, token): 395 self.tokens.insert(0, token); 396 397 def debug(self): 398 print "Last token: ", self.last 399 print "Token queue: ", self.tokens 400 print "Line %d end: " % (self.lineno), self.line 401 402 def token(self): 403 while self.tokens == []: 404 if self.line == "": 405 line = self.getline() 406 else: 407 line = self.line 408 self.line = "" 409 if line == None: 410 return None 411 412 if line[0] == '#': 413 self.tokens = map((lambda x: ('preproc', x)), 414 string.split(line)) 415 break; 416 l = len(line) 417 if line[0] == '"' or line[0] == "'": 418 end = line[0] 419 line = line[1:] 420 found = 0 421 tok = "" 422 while found == 0: 423 i = 0 424 l = len(line) 425 while i < l: 426 if line[i] == end: 427 self.line = line[i+1:] 428 line = line[:i] 429 l = i 430 found = 1 431 break 432 if line[i] == '\\': 433 i = i + 1 434 i = i + 1 435 tok = tok + line 436 if found == 0: 437 line = self.getline() 438 if line == None: 439 return None 440 self.last = ('string', tok) 441 return self.last 442 443 if l >= 2 and line[0] == '/' and line[1] == '*': 444 line = line[2:] 445 found = 0 446 tok = "" 447 while found == 0: 448 i = 0 449 l = len(line) 450 while i < l: 451 if line[i] == '*' and i+1 < l and line[i+1] == '/': 452 self.line = line[i+2:] 453 line = line[:i-1] 454 l = i 455 found = 1 456 break 457 i = i + 1 458 if tok != "": 459 tok = tok + "\n" 460 tok = tok + line 461 if found == 0: 462 line = self.getline() 463 if line == None: 464 return None 465 self.last = ('comment', tok) 466 return self.last 467 if l >= 2 and line[0] == '/' and line[1] == '/': 468 line = line[2:] 469 self.last = ('comment', line) 470 return self.last 471 i = 0 472 while i < l: 473 if line[i] == '/' and i+1 < l and line[i+1] == '/': 474 self.line = line[i:] 475 line = line[:i] 476 break 477 if line[i] == '/' and i+1 < l and line[i+1] == '*': 478 self.line = line[i:] 479 line = line[:i] 480 break 481 if line[i] == '"' or line[i] == "'": 482 self.line = line[i:] 483 line = line[:i] 484 break 485 i = i + 1 486 l = len(line) 487 i = 0 488 while i < l: 489 if line[i] == ' ' or line[i] == '\t': 490 i = i + 1 491 continue 492 o = ord(line[i]) 493 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 494 (o >= 48 and o <= 57): 495 s = i 496 while i < l: 497 o = ord(line[i]) 498 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 499 (o >= 48 and o <= 57) or string.find( 500 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 501 i = i + 1 502 else: 503 break 504 self.tokens.append(('name', line[s:i])) 505 continue 506 if string.find("(){}:;,[]", line[i]) != -1: 507# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 508# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 509# line[i] == ',' or line[i] == '[' or line[i] == ']': 510 self.tokens.append(('sep', line[i])) 511 i = i + 1 512 continue 513 if string.find("+-*><=/%&!|.", line[i]) != -1: 514# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 515# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 516# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 517# line[i] == '!' or line[i] == '|' or line[i] == '.': 518 if line[i] == '.' and i + 2 < l and \ 519 line[i+1] == '.' and line[i+2] == '.': 520 self.tokens.append(('name', '...')) 521 i = i + 3 522 continue 523 524 j = i + 1 525 if j < l and ( 526 string.find("+-*><=/%&!|", line[j]) != -1): 527# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 528# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 529# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 530# line[j] == '!' or line[j] == '|'): 531 self.tokens.append(('op', line[i:j+1])) 532 i = j + 1 533 else: 534 self.tokens.append(('op', line[i])) 535 i = i + 1 536 continue 537 s = i 538 while i < l: 539 o = ord(line[i]) 540 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 541 (o >= 48 and o <= 57) or ( 542 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 543# line[i] != ' ' and line[i] != '\t' and 544# line[i] != '(' and line[i] != ')' and 545# line[i] != '{' and line[i] != '}' and 546# line[i] != ':' and line[i] != ';' and 547# line[i] != ',' and line[i] != '+' and 548# line[i] != '-' and line[i] != '*' and 549# line[i] != '/' and line[i] != '%' and 550# line[i] != '&' and line[i] != '!' and 551# line[i] != '|' and line[i] != '[' and 552# line[i] != ']' and line[i] != '=' and 553# line[i] != '*' and line[i] != '>' and 554# line[i] != '<'): 555 i = i + 1 556 else: 557 break 558 self.tokens.append(('name', line[s:i])) 559 560 tok = self.tokens[0] 561 self.tokens = self.tokens[1:] 562 self.last = tok 563 return tok 564 565class CParser: 566 """The C module parser""" 567 def __init__(self, filename, idx = None): 568 self.filename = filename 569 if len(filename) > 2 and filename[-2:] == '.h': 570 self.is_header = 1 571 else: 572 self.is_header = 0 573 self.input = open(filename) 574 self.lexer = CLexer(self.input) 575 if idx == None: 576 self.index = index() 577 else: 578 self.index = idx 579 self.top_comment = "" 580 self.last_comment = "" 581 self.comment = None 582 self.collect_ref = 0 583 self.no_error = 0 584 self.conditionals = [] 585 self.defines = [] 586 587 def collect_references(self): 588 self.collect_ref = 1 589 590 def stop_error(self): 591 self.no_error = 1 592 593 def start_error(self): 594 self.no_error = 0 595 596 def lineno(self): 597 return self.lexer.getlineno() 598 599 def index_add(self, name, module, static, type, info=None, extra = None): 600 if self.is_header == 1: 601 self.index.add(name, module, module, static, type, self.lineno(), 602 info, extra, self.conditionals) 603 else: 604 self.index.add(name, None, module, static, type, self.lineno(), 605 info, extra, self.conditionals) 606 607 def index_add_ref(self, name, module, static, type, info=None, 608 extra = None): 609 if self.is_header == 1: 610 self.index.add_ref(name, module, module, static, type, 611 self.lineno(), info, extra, self.conditionals) 612 else: 613 self.index.add_ref(name, None, module, static, type, self.lineno(), 614 info, extra, self.conditionals) 615 616 def warning(self, msg): 617 if self.no_error: 618 return 619 print msg 620 621 def error(self, msg, token=-1): 622 if self.no_error: 623 return 624 625 print "Parse Error: " + msg 626 if token != -1: 627 print "Got token ", token 628 self.lexer.debug() 629 sys.exit(1) 630 631 def debug(self, msg, token=-1): 632 print "Debug: " + msg 633 if token != -1: 634 print "Got token ", token 635 self.lexer.debug() 636 637 def parseTopComment(self, comment): 638 res = {} 639 lines = string.split(comment, "\n") 640 item = None 641 for line in lines: 642 while line != "" and (line[0] == ' ' or line[0] == '\t'): 643 line = line[1:] 644 while line != "" and line[0] == '*': 645 line = line[1:] 646 while line != "" and (line[0] == ' ' or line[0] == '\t'): 647 line = line[1:] 648 try: 649 (it, line) = string.split(line, ":", 1) 650 item = it 651 while line != "" and (line[0] == ' ' or line[0] == '\t'): 652 line = line[1:] 653 if res.has_key(item): 654 res[item] = res[item] + " " + line 655 else: 656 res[item] = line 657 except: 658 if item != None: 659 if res.has_key(item): 660 res[item] = res[item] + " " + line 661 else: 662 res[item] = line 663 self.index.info = res 664 665 def parseComment(self, token): 666 if self.top_comment == "": 667 self.top_comment = token[1] 668 if self.comment == None or token[1][0] == '*': 669 self.comment = token[1]; 670 else: 671 self.comment = self.comment + token[1] 672 token = self.lexer.token() 673 674 if string.find(self.comment, "DOC_DISABLE") != -1: 675 self.stop_error() 676 677 if string.find(self.comment, "DOC_ENABLE") != -1: 678 self.start_error() 679 680 return token 681 682 # 683 # Parse a comment block associate to a typedef 684 # 685 def parseTypeComment(self, name, quiet = 0): 686 if name[0:2] == '__': 687 quiet = 1 688 689 args = [] 690 desc = "" 691 692 if self.comment == None: 693 if not quiet: 694 self.warning("Missing comment for type %s" % (name)) 695 return((args, desc)) 696 if self.comment[0] != '*': 697 if not quiet: 698 self.warning("Missing * in type comment for %s" % (name)) 699 return((args, desc)) 700 lines = string.split(self.comment, '\n') 701 if lines[0] == '*': 702 del lines[0] 703 if lines[0] != "* %s:" % (name): 704 if not quiet: 705 self.warning("Misformatted type comment for %s" % (name)) 706 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 707 return((args, desc)) 708 del lines[0] 709 while len(lines) > 0 and lines[0] == '*': 710 del lines[0] 711 desc = "" 712 while len(lines) > 0: 713 l = lines[0] 714 while len(l) > 0 and l[0] == '*': 715 l = l[1:] 716 l = string.strip(l) 717 desc = desc + " " + l 718 del lines[0] 719 720 desc = string.strip(desc) 721 722 if quiet == 0: 723 if desc == "": 724 self.warning("Type comment for %s lack description of the macro" % (name)) 725 726 return(desc) 727 # 728 # Parse a comment block associate to a macro 729 # 730 def parseMacroComment(self, name, quiet = 0): 731 if name[0:2] == '__': 732 quiet = 1 733 734 args = [] 735 desc = "" 736 737 if self.comment == None: 738 if not quiet: 739 self.warning("Missing comment for macro %s" % (name)) 740 return((args, desc)) 741 if self.comment[0] != '*': 742 if not quiet: 743 self.warning("Missing * in macro comment for %s" % (name)) 744 return((args, desc)) 745 lines = string.split(self.comment, '\n') 746 if lines[0] == '*': 747 del lines[0] 748 if lines[0] != "* %s:" % (name): 749 if not quiet: 750 self.warning("Misformatted macro comment for %s" % (name)) 751 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 752 return((args, desc)) 753 del lines[0] 754 while lines[0] == '*': 755 del lines[0] 756 while len(lines) > 0 and lines[0][0:3] == '* @': 757 l = lines[0][3:] 758 try: 759 (arg, desc) = string.split(l, ':', 1) 760 desc=string.strip(desc) 761 arg=string.strip(arg) 762 except: 763 if not quiet: 764 self.warning("Misformatted macro comment for %s" % (name)) 765 self.warning(" problem with '%s'" % (lines[0])) 766 del lines[0] 767 continue 768 del lines[0] 769 l = string.strip(lines[0]) 770 while len(l) > 2 and l[0:3] != '* @': 771 while l[0] == '*': 772 l = l[1:] 773 desc = desc + ' ' + string.strip(l) 774 del lines[0] 775 if len(lines) == 0: 776 break 777 l = lines[0] 778 args.append((arg, desc)) 779 while len(lines) > 0 and lines[0] == '*': 780 del lines[0] 781 desc = "" 782 while len(lines) > 0: 783 l = lines[0] 784 while len(l) > 0 and l[0] == '*': 785 l = l[1:] 786 l = string.strip(l) 787 desc = desc + " " + l 788 del lines[0] 789 790 desc = string.strip(desc) 791 792 if quiet == 0: 793 if desc == "": 794 self.warning("Macro comment for %s lack description of the macro" % (name)) 795 796 return((args, desc)) 797 798 # 799 # Parse a comment block and merge the informations found in the 800 # parameters descriptions, finally returns a block as complete 801 # as possible 802 # 803 def mergeFunctionComment(self, name, description, quiet = 0): 804 if name == 'main': 805 quiet = 1 806 if name[0:2] == '__': 807 quiet = 1 808 809 (ret, args) = description 810 desc = "" 811 retdesc = "" 812 813 if self.comment == None: 814 if not quiet: 815 self.warning("Missing comment for function %s" % (name)) 816 return(((ret[0], retdesc), args, desc)) 817 if self.comment[0] != '*': 818 if not quiet: 819 self.warning("Missing * in function comment for %s" % (name)) 820 return(((ret[0], retdesc), args, desc)) 821 lines = string.split(self.comment, '\n') 822 if lines[0] == '*': 823 del lines[0] 824 if lines[0] != "* %s:" % (name): 825 if not quiet: 826 self.warning("Misformatted function comment for %s" % (name)) 827 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 828 return(((ret[0], retdesc), args, desc)) 829 del lines[0] 830 while lines[0] == '*': 831 del lines[0] 832 nbargs = len(args) 833 while len(lines) > 0 and lines[0][0:3] == '* @': 834 l = lines[0][3:] 835 try: 836 (arg, desc) = string.split(l, ':', 1) 837 desc=string.strip(desc) 838 arg=string.strip(arg) 839 except: 840 if not quiet: 841 self.warning("Misformatted function comment for %s" % (name)) 842 self.warning(" problem with '%s'" % (lines[0])) 843 del lines[0] 844 continue 845 del lines[0] 846 l = string.strip(lines[0]) 847 while len(l) > 2 and l[0:3] != '* @': 848 while l[0] == '*': 849 l = l[1:] 850 desc = desc + ' ' + string.strip(l) 851 del lines[0] 852 if len(lines) == 0: 853 break 854 l = lines[0] 855 i = 0 856 while i < nbargs: 857 if args[i][1] == arg: 858 args[i] = (args[i][0], arg, desc) 859 break; 860 i = i + 1 861 if i >= nbargs: 862 if not quiet: 863 self.warning("Unable to find arg %s from function comment for %s" % ( 864 arg, name)) 865 while len(lines) > 0 and lines[0] == '*': 866 del lines[0] 867 desc = "" 868 while len(lines) > 0: 869 l = lines[0] 870 while len(l) > 0 and l[0] == '*': 871 l = l[1:] 872 l = string.strip(l) 873 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 874 try: 875 l = string.split(l, ' ', 1)[1] 876 except: 877 l = "" 878 retdesc = string.strip(l) 879 del lines[0] 880 while len(lines) > 0: 881 l = lines[0] 882 while len(l) > 0 and l[0] == '*': 883 l = l[1:] 884 l = string.strip(l) 885 retdesc = retdesc + " " + l 886 del lines[0] 887 else: 888 desc = desc + " " + l 889 del lines[0] 890 891 retdesc = string.strip(retdesc) 892 desc = string.strip(desc) 893 894 if quiet == 0: 895 # 896 # report missing comments 897 # 898 i = 0 899 while i < nbargs: 900 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: 901 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 902 i = i + 1 903 if retdesc == "" and ret[0] != "void": 904 self.warning("Function comment for %s lacks description of return value" % (name)) 905 if desc == "": 906 self.warning("Function comment for %s lacks description of the function" % (name)) 907 908 909 return(((ret[0], retdesc), args, desc)) 910 911 def parsePreproc(self, token): 912 if debug: 913 print "=> preproc ", token, self.lexer.tokens 914 name = token[1] 915 if name == "#include": 916 token = self.lexer.token() 917 if token == None: 918 return None 919 if token[0] == 'preproc': 920 self.index_add(token[1], self.filename, not self.is_header, 921 "include") 922 return self.lexer.token() 923 return token 924 if name == "#define": 925 token = self.lexer.token() 926 if token == None: 927 return None 928 if token[0] == 'preproc': 929 # TODO macros with arguments 930 name = token[1] 931 lst = [] 932 token = self.lexer.token() 933 while token != None and token[0] == 'preproc' and \ 934 token[1][0] != '#': 935 lst.append(token[1]) 936 token = self.lexer.token() 937 try: 938 name = string.split(name, '(') [0] 939 except: 940 pass 941 info = self.parseMacroComment(name, not self.is_header) 942 self.index_add(name, self.filename, not self.is_header, 943 "macro", info) 944 return token 945 946 # 947 # Processing of conditionals modified by Bill 1/1/05 948 # 949 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 950 # #if, #else and #endif) for headers and mainline code, 951 # store the ones from the header in libxml2-api.xml, and later 952 # (in the routine merge_public) verify that the two (header and 953 # mainline code) agree. 954 # 955 # There is a small problem with processing the headers. Some of 956 # the variables are not concerned with enabling / disabling of 957 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 958 # them to be included in libxml2-api.xml, or involved in 959 # the check between the header and the mainline code. To 960 # accomplish this, we ignore any conditional which doesn't include 961 # the string 'ENABLED' 962 # 963 if name == "#ifdef": 964 apstr = self.lexer.tokens[0][1] 965 try: 966 self.defines.append(apstr) 967 if string.find(apstr, 'ENABLED') != -1: 968 self.conditionals.append("defined(%s)" % apstr) 969 except: 970 pass 971 elif name == "#ifndef": 972 apstr = self.lexer.tokens[0][1] 973 try: 974 self.defines.append(apstr) 975 if string.find(apstr, 'ENABLED') != -1: 976 self.conditionals.append("!defined(%s)" % apstr) 977 except: 978 pass 979 elif name == "#if": 980 apstr = "" 981 for tok in self.lexer.tokens: 982 if apstr != "": 983 apstr = apstr + " " 984 apstr = apstr + tok[1] 985 try: 986 self.defines.append(apstr) 987 if string.find(apstr, 'ENABLED') != -1: 988 self.conditionals.append(apstr) 989 except: 990 pass 991 elif name == "#else": 992 if self.conditionals != [] and \ 993 string.find(self.defines[-1], 'ENABLED') != -1: 994 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 995 elif name == "#endif": 996 if self.conditionals != [] and \ 997 string.find(self.defines[-1], 'ENABLED') != -1: 998 self.conditionals = self.conditionals[:-1] 999 self.defines = self.defines[:-1] 1000 token = self.lexer.token() 1001 while token != None and token[0] == 'preproc' and \ 1002 token[1][0] != '#': 1003 token = self.lexer.token() 1004 return token 1005 1006 # 1007 # token acquisition on top of the lexer, it handle internally 1008 # preprocessor and comments since they are logically not part of 1009 # the program structure. 1010 # 1011 def token(self): 1012 global ignored_words 1013 1014 token = self.lexer.token() 1015 while token != None: 1016 if token[0] == 'comment': 1017 token = self.parseComment(token) 1018 continue 1019 elif token[0] == 'preproc': 1020 token = self.parsePreproc(token) 1021 continue 1022 elif token[0] == "name" and token[1] == "__const": 1023 token = ("name", "const") 1024 return token 1025 elif token[0] == "name" and token[1] == "__attribute": 1026 token = self.lexer.token() 1027 while token != None and token[1] != ";": 1028 token = self.lexer.token() 1029 return token 1030 elif token[0] == "name" and ignored_words.has_key(token[1]): 1031 (n, info) = ignored_words[token[1]] 1032 i = 0 1033 while i < n: 1034 token = self.lexer.token() 1035 i = i + 1 1036 token = self.lexer.token() 1037 continue 1038 else: 1039 if debug: 1040 print "=> ", token 1041 return token 1042 return None 1043 1044 # 1045 # Parse a typedef, it records the type and its name. 1046 # 1047 def parseTypedef(self, token): 1048 if token == None: 1049 return None 1050 token = self.parseType(token) 1051 if token == None: 1052 self.error("parsing typedef") 1053 return None 1054 base_type = self.type 1055 type = base_type 1056 #self.debug("end typedef type", token) 1057 while token != None: 1058 if token[0] == "name": 1059 name = token[1] 1060 signature = self.signature 1061 if signature != None: 1062 type = string.split(type, '(')[0] 1063 d = self.mergeFunctionComment(name, 1064 ((type, None), signature), 1) 1065 self.index_add(name, self.filename, not self.is_header, 1066 "functype", d) 1067 else: 1068 if base_type == "struct": 1069 self.index_add(name, self.filename, not self.is_header, 1070 "struct", type) 1071 base_type = "struct " + name 1072 else: 1073 # TODO report missing or misformatted comments 1074 info = self.parseTypeComment(name, 1) 1075 self.index_add(name, self.filename, not self.is_header, 1076 "typedef", type, info) 1077 token = self.token() 1078 else: 1079 self.error("parsing typedef: expecting a name") 1080 return token 1081 #self.debug("end typedef", token) 1082 if token != None and token[0] == 'sep' and token[1] == ',': 1083 type = base_type 1084 token = self.token() 1085 while token != None and token[0] == "op": 1086 type = type + token[1] 1087 token = self.token() 1088 elif token != None and token[0] == 'sep' and token[1] == ';': 1089 break; 1090 elif token != None and token[0] == 'name': 1091 type = base_type 1092 continue; 1093 else: 1094 self.error("parsing typedef: expecting ';'", token) 1095 return token 1096 token = self.token() 1097 return token 1098 1099 # 1100 # Parse a C code block, used for functions it parse till 1101 # the balancing } included 1102 # 1103 def parseBlock(self, token): 1104 while token != None: 1105 if token[0] == "sep" and token[1] == "{": 1106 token = self.token() 1107 token = self.parseBlock(token) 1108 elif token[0] == "sep" and token[1] == "}": 1109 self.comment = None 1110 token = self.token() 1111 return token 1112 else: 1113 if self.collect_ref == 1: 1114 oldtok = token 1115 token = self.token() 1116 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1117 if token[0] == "sep" and token[1] == "(": 1118 self.index_add_ref(oldtok[1], self.filename, 1119 0, "function") 1120 token = self.token() 1121 elif token[0] == "name": 1122 token = self.token() 1123 if token[0] == "sep" and (token[1] == ";" or 1124 token[1] == "," or token[1] == "="): 1125 self.index_add_ref(oldtok[1], self.filename, 1126 0, "type") 1127 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1128 self.index_add_ref(oldtok[1], self.filename, 1129 0, "typedef") 1130 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1131 self.index_add_ref(oldtok[1], self.filename, 1132 0, "typedef") 1133 1134 else: 1135 token = self.token() 1136 return token 1137 1138 # 1139 # Parse a C struct definition till the balancing } 1140 # 1141 def parseStruct(self, token): 1142 fields = [] 1143 #self.debug("start parseStruct", token) 1144 while token != None: 1145 if token[0] == "sep" and token[1] == "{": 1146 token = self.token() 1147 token = self.parseTypeBlock(token) 1148 elif token[0] == "sep" and token[1] == "}": 1149 self.struct_fields = fields 1150 #self.debug("end parseStruct", token) 1151 #print fields 1152 token = self.token() 1153 return token 1154 else: 1155 base_type = self.type 1156 #self.debug("before parseType", token) 1157 token = self.parseType(token) 1158 #self.debug("after parseType", token) 1159 if token != None and token[0] == "name": 1160 fname = token[1] 1161 token = self.token() 1162 if token[0] == "sep" and token[1] == ";": 1163 self.comment = None 1164 token = self.token() 1165 fields.append((self.type, fname, self.comment)) 1166 self.comment = None 1167 else: 1168 self.error("parseStruct: expecting ;", token) 1169 elif token != None and token[0] == "sep" and token[1] == "{": 1170 token = self.token() 1171 token = self.parseTypeBlock(token) 1172 if token != None and token[0] == "name": 1173 token = self.token() 1174 if token != None and token[0] == "sep" and token[1] == ";": 1175 token = self.token() 1176 else: 1177 self.error("parseStruct: expecting ;", token) 1178 else: 1179 self.error("parseStruct: name", token) 1180 token = self.token() 1181 self.type = base_type; 1182 self.struct_fields = fields 1183 #self.debug("end parseStruct", token) 1184 #print fields 1185 return token 1186 1187 # 1188 # Parse a C enum block, parse till the balancing } 1189 # 1190 def parseEnumBlock(self, token): 1191 self.enums = [] 1192 name = None 1193 self.comment = None 1194 comment = "" 1195 value = "0" 1196 while token != None: 1197 if token[0] == "sep" and token[1] == "{": 1198 token = self.token() 1199 token = self.parseTypeBlock(token) 1200 elif token[0] == "sep" and token[1] == "}": 1201 if name != None: 1202 if self.comment != None: 1203 comment = self.comment 1204 self.comment = None 1205 self.enums.append((name, value, comment)) 1206 token = self.token() 1207 return token 1208 elif token[0] == "name": 1209 if name != None: 1210 if self.comment != None: 1211 comment = string.strip(self.comment) 1212 self.comment = None 1213 self.enums.append((name, value, comment)) 1214 name = token[1] 1215 comment = "" 1216 token = self.token() 1217 if token[0] == "op" and token[1][0] == "=": 1218 value = "" 1219 if len(token[1]) > 1: 1220 value = token[1][1:] 1221 token = self.token() 1222 while token[0] != "sep" or (token[1] != ',' and 1223 token[1] != '}'): 1224 value = value + token[1] 1225 token = self.token() 1226 else: 1227 try: 1228 value = "%d" % (int(value) + 1) 1229 except: 1230 self.warning("Failed to compute value of enum %s" % (name)) 1231 value="" 1232 if token[0] == "sep" and token[1] == ",": 1233 token = self.token() 1234 else: 1235 token = self.token() 1236 return token 1237 1238 # 1239 # Parse a C definition block, used for structs it parse till 1240 # the balancing } 1241 # 1242 def parseTypeBlock(self, token): 1243 while token != None: 1244 if token[0] == "sep" and token[1] == "{": 1245 token = self.token() 1246 token = self.parseTypeBlock(token) 1247 elif token[0] == "sep" and token[1] == "}": 1248 token = self.token() 1249 return token 1250 else: 1251 token = self.token() 1252 return token 1253 1254 # 1255 # Parse a type: the fact that the type name can either occur after 1256 # the definition or within the definition makes it a little harder 1257 # if inside, the name token is pushed back before returning 1258 # 1259 def parseType(self, token): 1260 self.type = "" 1261 self.struct_fields = [] 1262 self.signature = None 1263 if token == None: 1264 return token 1265 1266 while token[0] == "name" and ( 1267 token[1] == "const" or \ 1268 token[1] == "unsigned" or \ 1269 token[1] == "signed"): 1270 if self.type == "": 1271 self.type = token[1] 1272 else: 1273 self.type = self.type + " " + token[1] 1274 token = self.token() 1275 1276 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1277 if self.type == "": 1278 self.type = token[1] 1279 else: 1280 self.type = self.type + " " + token[1] 1281 if token[0] == "name" and token[1] == "int": 1282 if self.type == "": 1283 self.type = tmp[1] 1284 else: 1285 self.type = self.type + " " + tmp[1] 1286 1287 elif token[0] == "name" and token[1] == "struct": 1288 if self.type == "": 1289 self.type = token[1] 1290 else: 1291 self.type = self.type + " " + token[1] 1292 token = self.token() 1293 nametok = None 1294 if token[0] == "name": 1295 nametok = token 1296 token = self.token() 1297 if token != None and token[0] == "sep" and token[1] == "{": 1298 token = self.token() 1299 token = self.parseStruct(token) 1300 elif token != None and token[0] == "op" and token[1] == "*": 1301 self.type = self.type + " " + nametok[1] + " *" 1302 token = self.token() 1303 while token != None and token[0] == "op" and token[1] == "*": 1304 self.type = self.type + " *" 1305 token = self.token() 1306 if token[0] == "name": 1307 nametok = token 1308 token = self.token() 1309 else: 1310 self.error("struct : expecting name", token) 1311 return token 1312 elif token != None and token[0] == "name" and nametok != None: 1313 self.type = self.type + " " + nametok[1] 1314 return token 1315 1316 if nametok != None: 1317 self.lexer.push(token) 1318 token = nametok 1319 return token 1320 1321 elif token[0] == "name" and token[1] == "enum": 1322 if self.type == "": 1323 self.type = token[1] 1324 else: 1325 self.type = self.type + " " + token[1] 1326 self.enums = [] 1327 token = self.token() 1328 if token != None and token[0] == "sep" and token[1] == "{": 1329 token = self.token() 1330 token = self.parseEnumBlock(token) 1331 else: 1332 self.error("parsing enum: expecting '{'", token) 1333 enum_type = None 1334 if token != None and token[0] != "name": 1335 self.lexer.push(token) 1336 token = ("name", "enum") 1337 else: 1338 enum_type = token[1] 1339 for enum in self.enums: 1340 self.index_add(enum[0], self.filename, 1341 not self.is_header, "enum", 1342 (enum[1], enum[2], enum_type)) 1343 return token 1344 1345 elif token[0] == "name": 1346 if self.type == "": 1347 self.type = token[1] 1348 else: 1349 self.type = self.type + " " + token[1] 1350 else: 1351 self.error("parsing type %s: expecting a name" % (self.type), 1352 token) 1353 return token 1354 token = self.token() 1355 while token != None and (token[0] == "op" or 1356 token[0] == "name" and token[1] == "const"): 1357 self.type = self.type + " " + token[1] 1358 token = self.token() 1359 1360 # 1361 # if there is a parenthesis here, this means a function type 1362 # 1363 if token != None and token[0] == "sep" and token[1] == '(': 1364 self.type = self.type + token[1] 1365 token = self.token() 1366 while token != None and token[0] == "op" and token[1] == '*': 1367 self.type = self.type + token[1] 1368 token = self.token() 1369 if token == None or token[0] != "name" : 1370 self.error("parsing function type, name expected", token); 1371 return token 1372 self.type = self.type + token[1] 1373 nametok = token 1374 token = self.token() 1375 if token != None and token[0] == "sep" and token[1] == ')': 1376 self.type = self.type + token[1] 1377 token = self.token() 1378 if token != None and token[0] == "sep" and token[1] == '(': 1379 token = self.token() 1380 type = self.type; 1381 token = self.parseSignature(token); 1382 self.type = type; 1383 else: 1384 self.error("parsing function type, '(' expected", token); 1385 return token 1386 else: 1387 self.error("parsing function type, ')' expected", token); 1388 return token 1389 self.lexer.push(token) 1390 token = nametok 1391 return token 1392 1393 # 1394 # do some lookahead for arrays 1395 # 1396 if token != None and token[0] == "name": 1397 nametok = token 1398 token = self.token() 1399 if token != None and token[0] == "sep" and token[1] == '[': 1400 self.type = self.type + nametok[1] 1401 while token != None and token[0] == "sep" and token[1] == '[': 1402 self.type = self.type + token[1] 1403 token = self.token() 1404 while token != None and token[0] != 'sep' and \ 1405 token[1] != ']' and token[1] != ';': 1406 self.type = self.type + token[1] 1407 token = self.token() 1408 if token != None and token[0] == 'sep' and token[1] == ']': 1409 self.type = self.type + token[1] 1410 token = self.token() 1411 else: 1412 self.error("parsing array type, ']' expected", token); 1413 return token 1414 elif token != None and token[0] == "sep" and token[1] == ':': 1415 # remove :12 in case it's a limited int size 1416 token = self.token() 1417 token = self.token() 1418 self.lexer.push(token) 1419 token = nametok 1420 1421 return token 1422 1423 # 1424 # Parse a signature: '(' has been parsed and we scan the type definition 1425 # up to the ')' included 1426 def parseSignature(self, token): 1427 signature = [] 1428 if token != None and token[0] == "sep" and token[1] == ')': 1429 self.signature = [] 1430 token = self.token() 1431 return token 1432 while token != None: 1433 token = self.parseType(token) 1434 if token != None and token[0] == "name": 1435 signature.append((self.type, token[1], None)) 1436 token = self.token() 1437 elif token != None and token[0] == "sep" and token[1] == ',': 1438 token = self.token() 1439 continue 1440 elif token != None and token[0] == "sep" and token[1] == ')': 1441 # only the type was provided 1442 if self.type == "...": 1443 signature.append((self.type, "...", None)) 1444 else: 1445 signature.append((self.type, None, None)) 1446 if token != None and token[0] == "sep": 1447 if token[1] == ',': 1448 token = self.token() 1449 continue 1450 elif token[1] == ')': 1451 token = self.token() 1452 break 1453 self.signature = signature 1454 return token 1455 1456 # 1457 # Parse a global definition, be it a type, variable or function 1458 # the extern "C" blocks are a bit nasty and require it to recurse. 1459 # 1460 def parseGlobal(self, token): 1461 static = 0 1462 if token[1] == 'extern': 1463 token = self.token() 1464 if token == None: 1465 return token 1466 if token[0] == 'string': 1467 if token[1] == 'C': 1468 token = self.token() 1469 if token == None: 1470 return token 1471 if token[0] == 'sep' and token[1] == "{": 1472 token = self.token() 1473# print 'Entering extern "C line ', self.lineno() 1474 while token != None and (token[0] != 'sep' or 1475 token[1] != "}"): 1476 if token[0] == 'name': 1477 token = self.parseGlobal(token) 1478 else: 1479 self.error( 1480 "token %s %s unexpected at the top level" % ( 1481 token[0], token[1])) 1482 token = self.parseGlobal(token) 1483# print 'Exiting extern "C" line', self.lineno() 1484 token = self.token() 1485 return token 1486 else: 1487 return token 1488 elif token[1] == 'static': 1489 static = 1 1490 token = self.token() 1491 if token == None or token[0] != 'name': 1492 return token 1493 1494 if token[1] == 'typedef': 1495 token = self.token() 1496 return self.parseTypedef(token) 1497 else: 1498 token = self.parseType(token) 1499 type_orig = self.type 1500 if token == None or token[0] != "name": 1501 return token 1502 type = type_orig 1503 self.name = token[1] 1504 token = self.token() 1505 while token != None and (token[0] == "sep" or token[0] == "op"): 1506 if token[0] == "sep": 1507 if token[1] == "[": 1508 type = type + token[1] 1509 token = self.token() 1510 while token != None and (token[0] != "sep" or \ 1511 token[1] != ";"): 1512 type = type + token[1] 1513 token = self.token() 1514 1515 if token != None and token[0] == "op" and token[1] == "=": 1516 # 1517 # Skip the initialization of the variable 1518 # 1519 token = self.token() 1520 if token[0] == 'sep' and token[1] == '{': 1521 token = self.token() 1522 token = self.parseBlock(token) 1523 else: 1524 self.comment = None 1525 while token != None and (token[0] != "sep" or \ 1526 (token[1] != ';' and token[1] != ',')): 1527 token = self.token() 1528 self.comment = None 1529 if token == None or token[0] != "sep" or (token[1] != ';' and 1530 token[1] != ','): 1531 self.error("missing ';' or ',' after value") 1532 1533 if token != None and token[0] == "sep": 1534 if token[1] == ";": 1535 self.comment = None 1536 token = self.token() 1537 if type == "struct": 1538 self.index_add(self.name, self.filename, 1539 not self.is_header, "struct", self.struct_fields) 1540 else: 1541 self.index_add(self.name, self.filename, 1542 not self.is_header, "variable", type) 1543 break 1544 elif token[1] == "(": 1545 token = self.token() 1546 token = self.parseSignature(token) 1547 if token == None: 1548 return None 1549 if token[0] == "sep" and token[1] == ";": 1550 d = self.mergeFunctionComment(self.name, 1551 ((type, None), self.signature), 1) 1552 self.index_add(self.name, self.filename, static, 1553 "function", d) 1554 token = self.token() 1555 elif token[0] == "sep" and token[1] == "{": 1556 d = self.mergeFunctionComment(self.name, 1557 ((type, None), self.signature), static) 1558 self.index_add(self.name, self.filename, static, 1559 "function", d) 1560 token = self.token() 1561 token = self.parseBlock(token); 1562 elif token[1] == ',': 1563 self.comment = None 1564 self.index_add(self.name, self.filename, static, 1565 "variable", type) 1566 type = type_orig 1567 token = self.token() 1568 while token != None and token[0] == "sep": 1569 type = type + token[1] 1570 token = self.token() 1571 if token != None and token[0] == "name": 1572 self.name = token[1] 1573 token = self.token() 1574 else: 1575 break 1576 1577 return token 1578 1579 def parse(self): 1580 self.warning("Parsing %s" % (self.filename)) 1581 token = self.token() 1582 while token != None: 1583 if token[0] == 'name': 1584 token = self.parseGlobal(token) 1585 else: 1586 self.error("token %s %s unexpected at the top level" % ( 1587 token[0], token[1])) 1588 token = self.parseGlobal(token) 1589 return 1590 self.parseTopComment(self.top_comment) 1591 return self.index 1592 1593 1594class docBuilder: 1595 """A documentation builder""" 1596 def __init__(self, name, directories=['.'], excludes=[]): 1597 self.name = name 1598 self.directories = directories 1599 self.excludes = excludes + ignored_files.keys() 1600 self.modules = {} 1601 self.headers = {} 1602 self.idx = index() 1603 self.xref = {} 1604 self.index = {} 1605 if name == 'libxml2': 1606 self.basename = 'libxml' 1607 else: 1608 self.basename = name 1609 1610 def indexString(self, id, str): 1611 if str == None: 1612 return 1613 str = string.replace(str, "'", ' ') 1614 str = string.replace(str, '"', ' ') 1615 str = string.replace(str, "/", ' ') 1616 str = string.replace(str, '*', ' ') 1617 str = string.replace(str, "[", ' ') 1618 str = string.replace(str, "]", ' ') 1619 str = string.replace(str, "(", ' ') 1620 str = string.replace(str, ")", ' ') 1621 str = string.replace(str, "<", ' ') 1622 str = string.replace(str, '>', ' ') 1623 str = string.replace(str, "&", ' ') 1624 str = string.replace(str, '#', ' ') 1625 str = string.replace(str, ",", ' ') 1626 str = string.replace(str, '.', ' ') 1627 str = string.replace(str, ';', ' ') 1628 tokens = string.split(str) 1629 for token in tokens: 1630 try: 1631 c = token[0] 1632 if string.find(string.letters, c) < 0: 1633 pass 1634 elif len(token) < 3: 1635 pass 1636 else: 1637 lower = string.lower(token) 1638 # TODO: generalize this a bit 1639 if lower == 'and' or lower == 'the': 1640 pass 1641 elif self.xref.has_key(token): 1642 self.xref[token].append(id) 1643 else: 1644 self.xref[token] = [id] 1645 except: 1646 pass 1647 1648 def analyze(self): 1649 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1650 self.idx.analyze() 1651 1652 def scanHeaders(self): 1653 for header in self.headers.keys(): 1654 parser = CParser(header) 1655 idx = parser.parse() 1656 self.headers[header] = idx; 1657 self.idx.merge(idx) 1658 1659 def scanModules(self): 1660 for module in self.modules.keys(): 1661 parser = CParser(module) 1662 idx = parser.parse() 1663 # idx.analyze() 1664 self.modules[module] = idx 1665 self.idx.merge_public(idx) 1666 1667 def scan(self): 1668 for directory in self.directories: 1669 files = glob.glob(directory + "/*.c") 1670 for file in files: 1671 skip = 0 1672 for excl in self.excludes: 1673 if string.find(file, excl) != -1: 1674 skip = 1; 1675 break 1676 if skip == 0: 1677 self.modules[file] = None; 1678 files = glob.glob(directory + "/*.h") 1679 for file in files: 1680 skip = 0 1681 for excl in self.excludes: 1682 if string.find(file, excl) != -1: 1683 skip = 1; 1684 break 1685 if skip == 0: 1686 self.headers[file] = None; 1687 self.scanHeaders() 1688 self.scanModules() 1689 1690 def modulename_file(self, file): 1691 module = os.path.basename(file) 1692 if module[-2:] == '.h': 1693 module = module[:-2] 1694 elif module[-2:] == '.c': 1695 module = module[:-2] 1696 return module 1697 1698 def serialize_enum(self, output, name): 1699 id = self.idx.enums[name] 1700 output.write(" <enum name='%s' file='%s'" % (name, 1701 self.modulename_file(id.header))) 1702 if id.info != None: 1703 info = id.info 1704 if info[0] != None and info[0] != '': 1705 try: 1706 val = eval(info[0]) 1707 except: 1708 val = info[0] 1709 output.write(" value='%s'" % (val)); 1710 if info[2] != None and info[2] != '': 1711 output.write(" type='%s'" % info[2]); 1712 if info[1] != None and info[1] != '': 1713 output.write(" info='%s'" % escape(info[1])); 1714 output.write("/>\n") 1715 1716 def serialize_macro(self, output, name): 1717 id = self.idx.macros[name] 1718 output.write(" <macro name='%s' file='%s'>\n" % (name, 1719 self.modulename_file(id.header))) 1720 if id.info != None: 1721 try: 1722 (args, desc) = id.info 1723 if desc != None and desc != "": 1724 output.write(" <info>%s</info>\n" % (escape(desc))) 1725 self.indexString(name, desc) 1726 for arg in args: 1727 (name, desc) = arg 1728 if desc != None and desc != "": 1729 output.write(" <arg name='%s' info='%s'/>\n" % ( 1730 name, escape(desc))) 1731 self.indexString(name, desc) 1732 else: 1733 output.write(" <arg name='%s'/>\n" % (name)) 1734 except: 1735 pass 1736 output.write(" </macro>\n") 1737 1738 def serialize_typedef(self, output, name): 1739 id = self.idx.typedefs[name] 1740 if name == 'xmlChar': 1741 print id 1742 if id.info[0:7] == 'struct ': 1743 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1744 name, self.modulename_file(id.header), id.info)) 1745 name = id.info[7:] 1746 if self.idx.structs.has_key(name) and ( \ 1747 type(self.idx.structs[name].info) == type(()) or 1748 type(self.idx.structs[name].info) == type([])): 1749 output.write(">\n"); 1750 try: 1751 for field in self.idx.structs[name].info: 1752 desc = field[2] 1753 self.indexString(name, desc) 1754 if desc == None: 1755 desc = '' 1756 else: 1757 desc = escape(desc) 1758 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1759 except: 1760 print "Failed to serialize struct %s" % (name) 1761 output.write(" </struct>\n") 1762 else: 1763 output.write("/>\n"); 1764 else : 1765 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1766 name, self.modulename_file(id.header), id.info)) 1767 try: 1768 desc = id.extra 1769 if desc != None and desc != "": 1770 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1771 output.write(" </typedef>\n") 1772 else: 1773 output.write("/>\n") 1774 except: 1775 output.write("/>\n") 1776 1777 def serialize_variable(self, output, name): 1778 id = self.idx.variables[name] 1779 if id.info != None: 1780 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1781 name, self.modulename_file(id.header), id.info)) 1782 else: 1783 output.write(" <variable name='%s' file='%s'/>\n" % ( 1784 name, self.modulename_file(id.header))) 1785 1786 def serialize_function(self, output, name): 1787 id = self.idx.functions[name] 1788 if name == debugsym: 1789 print "=>", id 1790 1791 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1792 name, self.modulename_file(id.header), 1793 self.modulename_file(id.module))) 1794 # 1795 # Processing of conditionals modified by Bill 1/1/05 1796 # 1797 if id.conditionals != None: 1798 apstr = "" 1799 for cond in id.conditionals: 1800 if apstr != "": 1801 apstr = apstr + " && " 1802 apstr = apstr + cond 1803 output.write(" <cond>%s</cond>\n"% (apstr)); 1804 try: 1805 (ret, params, desc) = id.info 1806 output.write(" <info>%s</info>\n" % (escape(desc))) 1807 self.indexString(name, desc) 1808 if ret[0] != None: 1809 if ret[0] == "void": 1810 output.write(" <return type='void'/>\n") 1811 else: 1812 output.write(" <return type='%s' info='%s'/>\n" % ( 1813 ret[0], escape(ret[1]))) 1814 self.indexString(name, ret[1]) 1815 for param in params: 1816 if param[0] == 'void': 1817 continue 1818 if param[2] == None: 1819 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1820 else: 1821 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1822 self.indexString(name, param[2]) 1823 except: 1824 print "Failed to save function %s info: " % name, `id.info` 1825 output.write(" </%s>\n" % (id.type)) 1826 1827 def serialize_exports(self, output, file): 1828 module = self.modulename_file(file) 1829 output.write(" <file name='%s'>\n" % (module)) 1830 dict = self.headers[file] 1831 if dict.info != None: 1832 for data in ('Summary', 'Description', 'Author'): 1833 try: 1834 output.write(" <%s>%s</%s>\n" % ( 1835 string.lower(data), 1836 escape(dict.info[data]), 1837 string.lower(data))) 1838 except: 1839 print "Header %s lacks a %s description" % (module, data) 1840 if dict.info.has_key('Description'): 1841 desc = dict.info['Description'] 1842 if string.find(desc, "DEPRECATED") != -1: 1843 output.write(" <deprecated/>\n") 1844 1845 ids = dict.macros.keys() 1846 ids.sort() 1847 for id in uniq(ids): 1848 # Macros are sometime used to masquerade other types. 1849 if dict.functions.has_key(id): 1850 continue 1851 if dict.variables.has_key(id): 1852 continue 1853 if dict.typedefs.has_key(id): 1854 continue 1855 if dict.structs.has_key(id): 1856 continue 1857 if dict.enums.has_key(id): 1858 continue 1859 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1860 ids = dict.enums.keys() 1861 ids.sort() 1862 for id in uniq(ids): 1863 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1864 ids = dict.typedefs.keys() 1865 ids.sort() 1866 for id in uniq(ids): 1867 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1868 ids = dict.structs.keys() 1869 ids.sort() 1870 for id in uniq(ids): 1871 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1872 ids = dict.variables.keys() 1873 ids.sort() 1874 for id in uniq(ids): 1875 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1876 ids = dict.functions.keys() 1877 ids.sort() 1878 for id in uniq(ids): 1879 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1880 output.write(" </file>\n") 1881 1882 def serialize_xrefs_files(self, output): 1883 headers = self.headers.keys() 1884 headers.sort() 1885 for file in headers: 1886 module = self.modulename_file(file) 1887 output.write(" <file name='%s'>\n" % (module)) 1888 dict = self.headers[file] 1889 ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1890 dict.macros.keys() + dict.typedefs.keys() + \ 1891 dict.structs.keys() + dict.enums.keys()) 1892 ids.sort() 1893 for id in ids: 1894 output.write(" <ref name='%s'/>\n" % (id)) 1895 output.write(" </file>\n") 1896 pass 1897 1898 def serialize_xrefs_functions(self, output): 1899 funcs = {} 1900 for name in self.idx.functions.keys(): 1901 id = self.idx.functions[name] 1902 try: 1903 (ret, params, desc) = id.info 1904 for param in params: 1905 if param[0] == 'void': 1906 continue 1907 if funcs.has_key(param[0]): 1908 funcs[param[0]].append(name) 1909 else: 1910 funcs[param[0]] = [name] 1911 except: 1912 pass 1913 typ = funcs.keys() 1914 typ.sort() 1915 for type in typ: 1916 if type == '' or type == 'void' or type == "int" or \ 1917 type == "char *" or type == "const char *" : 1918 continue 1919 output.write(" <type name='%s'>\n" % (type)) 1920 ids = funcs[type] 1921 ids.sort() 1922 pid = '' # not sure why we have dups, but get rid of them! 1923 for id in ids: 1924 if id != pid: 1925 output.write(" <ref name='%s'/>\n" % (id)) 1926 pid = id 1927 output.write(" </type>\n") 1928 1929 def serialize_xrefs_constructors(self, output): 1930 funcs = {} 1931 for name in self.idx.functions.keys(): 1932 id = self.idx.functions[name] 1933 try: 1934 (ret, params, desc) = id.info 1935 if ret[0] == "void": 1936 continue 1937 if funcs.has_key(ret[0]): 1938 funcs[ret[0]].append(name) 1939 else: 1940 funcs[ret[0]] = [name] 1941 except: 1942 pass 1943 typ = funcs.keys() 1944 typ.sort() 1945 for type in typ: 1946 if type == '' or type == 'void' or type == "int" or \ 1947 type == "char *" or type == "const char *" : 1948 continue 1949 output.write(" <type name='%s'>\n" % (type)) 1950 ids = funcs[type] 1951 ids.sort() 1952 for id in ids: 1953 output.write(" <ref name='%s'/>\n" % (id)) 1954 output.write(" </type>\n") 1955 1956 def serialize_xrefs_alpha(self, output): 1957 letter = None 1958 ids = self.idx.identifiers.keys() 1959 ids.sort() 1960 for id in ids: 1961 if id[0] != letter: 1962 if letter != None: 1963 output.write(" </letter>\n") 1964 letter = id[0] 1965 output.write(" <letter name='%s'>\n" % (letter)) 1966 output.write(" <ref name='%s'/>\n" % (id)) 1967 if letter != None: 1968 output.write(" </letter>\n") 1969 1970 def serialize_xrefs_references(self, output): 1971 typ = self.idx.identifiers.keys() 1972 typ.sort() 1973 for id in typ: 1974 idf = self.idx.identifiers[id] 1975 module = idf.header 1976 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1977 'html/' + self.basename + '-' + 1978 self.modulename_file(module) + '.html#' + 1979 id)) 1980 1981 def serialize_xrefs_index(self, output): 1982 index = self.xref 1983 typ = index.keys() 1984 typ.sort() 1985 letter = None 1986 count = 0 1987 chunk = 0 1988 chunks = [] 1989 for id in typ: 1990 if len(index[id]) > 30: 1991 continue 1992 if id[0] != letter: 1993 if letter == None or count > 200: 1994 if letter != None: 1995 output.write(" </letter>\n") 1996 output.write(" </chunk>\n") 1997 count = 0 1998 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 1999 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2000 first_letter = id[0] 2001 chunk = chunk + 1 2002 elif letter != None: 2003 output.write(" </letter>\n") 2004 letter = id[0] 2005 output.write(" <letter name='%s'>\n" % (letter)) 2006 output.write(" <word name='%s'>\n" % (id)) 2007 tokens = index[id]; 2008 tokens.sort() 2009 tok = None 2010 for token in tokens: 2011 if tok == token: 2012 continue 2013 tok = token 2014 output.write(" <ref name='%s'/>\n" % (token)) 2015 count = count + 1 2016 output.write(" </word>\n") 2017 if letter != None: 2018 output.write(" </letter>\n") 2019 output.write(" </chunk>\n") 2020 if count != 0: 2021 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2022 output.write(" <chunks>\n") 2023 for ch in chunks: 2024 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2025 ch[0], ch[1], ch[2])) 2026 output.write(" </chunks>\n") 2027 2028 def serialize_xrefs(self, output): 2029 output.write(" <references>\n") 2030 self.serialize_xrefs_references(output) 2031 output.write(" </references>\n") 2032 output.write(" <alpha>\n") 2033 self.serialize_xrefs_alpha(output) 2034 output.write(" </alpha>\n") 2035 output.write(" <constructors>\n") 2036 self.serialize_xrefs_constructors(output) 2037 output.write(" </constructors>\n") 2038 output.write(" <functions>\n") 2039 self.serialize_xrefs_functions(output) 2040 output.write(" </functions>\n") 2041 output.write(" <files>\n") 2042 self.serialize_xrefs_files(output) 2043 output.write(" </files>\n") 2044 output.write(" <index>\n") 2045 self.serialize_xrefs_index(output) 2046 output.write(" </index>\n") 2047 2048 def serialize(self): 2049 filename = "%s-api.xml" % self.name 2050 print "Saving XML description %s" % (filename) 2051 output = open(filename, "w") 2052 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2053 output.write("<api name='%s'>\n" % self.name) 2054 output.write(" <files>\n") 2055 headers = self.headers.keys() 2056 headers.sort() 2057 for file in headers: 2058 self.serialize_exports(output, file) 2059 output.write(" </files>\n") 2060 output.write(" <symbols>\n") 2061 macros = self.idx.macros.keys() 2062 macros.sort() 2063 for macro in macros: 2064 self.serialize_macro(output, macro) 2065 enums = self.idx.enums.keys() 2066 enums.sort() 2067 for enum in enums: 2068 self.serialize_enum(output, enum) 2069 typedefs = self.idx.typedefs.keys() 2070 typedefs.sort() 2071 for typedef in typedefs: 2072 self.serialize_typedef(output, typedef) 2073 variables = self.idx.variables.keys() 2074 variables.sort() 2075 for variable in variables: 2076 self.serialize_variable(output, variable) 2077 functions = self.idx.functions.keys() 2078 functions.sort() 2079 for function in functions: 2080 self.serialize_function(output, function) 2081 output.write(" </symbols>\n") 2082 output.write("</api>\n") 2083 output.close() 2084 2085 filename = "%s-refs.xml" % self.name 2086 print "Saving XML Cross References %s" % (filename) 2087 output = open(filename, "w") 2088 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2089 output.write("<apirefs name='%s'>\n" % self.name) 2090 self.serialize_xrefs(output) 2091 output.write("</apirefs>\n") 2092 output.close() 2093 2094 2095def rebuild(): 2096 builder = None 2097 if glob.glob("parser.c") != [] : 2098 print "Rebuilding API description for libxml2" 2099 builder = docBuilder("libxml2", [".", "."], 2100 ["xmlwin32version.h", "tst.c"]) 2101 elif glob.glob("../parser.c") != [] : 2102 print "Rebuilding API description for libxml2" 2103 builder = docBuilder("libxml2", ["..", "../include/libxml"], 2104 ["xmlwin32version.h", "tst.c"]) 2105 elif glob.glob("../libxslt/transform.c") != [] : 2106 print "Rebuilding API description for libxslt" 2107 builder = docBuilder("libxslt", ["../libxslt"], 2108 ["win32config.h", "libxslt.h", "tst.c"]) 2109 else: 2110 print "rebuild() failed, unable to guess the module" 2111 return None 2112 builder.scan() 2113 builder.analyze() 2114 builder.serialize() 2115 if glob.glob("../libexslt/exslt.c") != [] : 2116 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2117 extra.scan() 2118 extra.analyze() 2119 extra.serialize() 2120 return builder 2121 2122# 2123# for debugging the parser 2124# 2125def parse(filename): 2126 parser = CParser(filename) 2127 idx = parser.parse() 2128 return idx 2129 2130if __name__ == "__main__": 2131 if len(sys.argv) > 1: 2132 debug = 1 2133 parse(sys.argv[1]) 2134 else: 2135 rebuild() 2136