apibuild.py revision 99dd7636a44208a14bc71614120f8ef4b8123016
1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "acconfig.h": "generated portability layer", 27 "config.h": "generated portability layer", 28 "libxml.h": "internal only", 29 "testOOM.c": "out of memory tester", 30 "testOOMlib.h": "out of memory tester", 31 "testOOMlib.c": "out of memory tester", 32 "rngparser.c": "not yet integrated", 33 "rngparser.h": "not yet integrated", 34 "elfgcchack.h": "not a normal header", 35 "testapi.c": "generated regression tests", 36 "tst.c": "not part of the library", 37 "testdso.c": "test for dynamid shared libraries", 38} 39 40ignored_words = { 41 "WINAPI": (0, "Windows keyword"), 42 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 43 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 44 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 45 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 46 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 47 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 48 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 49 "XMLCALL": (0, "Special macro for win32 calls"), 50 "XSLTCALL": (0, "Special macro for win32 calls"), 51 "EXSLTCALL": (0, "Special macro for win32 calls"), 52 "__declspec": (3, "Windows keyword"), 53 "__stdcall": (0, "Windows keyword"), 54 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 55 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 56 "X_IN_Y": (5, "macro function builder"), 57} 58 59def escape(raw): 60 raw = string.replace(raw, '&', '&') 61 raw = string.replace(raw, '<', '<') 62 raw = string.replace(raw, '>', '>') 63 raw = string.replace(raw, "'", ''') 64 raw = string.replace(raw, '"', '"') 65 return raw 66 67def uniq(items): 68 d = {} 69 for item in items: 70 d[item]=1 71 return d.keys() 72 73class identifier: 74 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 75 info=None, extra=None, conditionals = None): 76 self.name = name 77 self.header = header 78 self.module = module 79 self.type = type 80 self.info = info 81 self.extra = extra 82 self.lineno = lineno 83 self.static = 0 84 if conditionals == None or len(conditionals) == 0: 85 self.conditionals = None 86 else: 87 self.conditionals = conditionals[:] 88 if self.name == debugsym: 89 print "=> define %s : %s" % (debugsym, (module, type, info, 90 extra, conditionals)) 91 92 def __repr__(self): 93 r = "%s %s:" % (self.type, self.name) 94 if self.static: 95 r = r + " static" 96 if self.module != None: 97 r = r + " from %s" % (self.module) 98 if self.info != None: 99 r = r + " " + `self.info` 100 if self.extra != None: 101 r = r + " " + `self.extra` 102 if self.conditionals != None: 103 r = r + " " + `self.conditionals` 104 return r 105 106 107 def set_header(self, header): 108 self.header = header 109 def set_module(self, module): 110 self.module = module 111 def set_type(self, type): 112 self.type = type 113 def set_info(self, info): 114 self.info = info 115 def set_extra(self, extra): 116 self.extra = extra 117 def set_lineno(self, lineno): 118 self.lineno = lineno 119 def set_static(self, static): 120 self.static = static 121 def set_conditionals(self, conditionals): 122 if conditionals == None or len(conditionals) == 0: 123 self.conditionals = None 124 else: 125 self.conditionals = conditionals[:] 126 127 def get_name(self): 128 return self.name 129 def get_header(self): 130 return self.module 131 def get_module(self): 132 return self.module 133 def get_type(self): 134 return self.type 135 def get_info(self): 136 return self.info 137 def get_lineno(self): 138 return self.lineno 139 def get_extra(self): 140 return self.extra 141 def get_static(self): 142 return self.static 143 def get_conditionals(self): 144 return self.conditionals 145 146 def update(self, header, module, type = None, info = None, extra=None, 147 conditionals=None): 148 if self.name == debugsym: 149 print "=> update %s : %s" % (debugsym, (module, type, info, 150 extra, conditionals)) 151 if header != None and self.header == None: 152 self.set_header(module) 153 if module != None and (self.module == None or self.header == self.module): 154 self.set_module(module) 155 if type != None and self.type == None: 156 self.set_type(type) 157 if info != None: 158 self.set_info(info) 159 if extra != None: 160 self.set_extra(extra) 161 if conditionals != None: 162 self.set_conditionals(conditionals) 163 164class index: 165 def __init__(self, name = "noname"): 166 self.name = name 167 self.identifiers = {} 168 self.functions = {} 169 self.variables = {} 170 self.includes = {} 171 self.structs = {} 172 self.enums = {} 173 self.typedefs = {} 174 self.macros = {} 175 self.references = {} 176 self.info = {} 177 178 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 179 if name[0:2] == '__': 180 return None 181 d = None 182 try: 183 d = self.identifiers[name] 184 d.update(header, module, type, lineno, info, extra, conditionals) 185 except: 186 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 187 self.identifiers[name] = d 188 189 if d != None and static == 1: 190 d.set_static(1) 191 192 if d != None and name != None and type != None: 193 self.references[name] = d 194 195 if name == debugsym: 196 print "New ref: %s" % (d) 197 198 return d 199 200 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 201 if name[0:2] == '__': 202 return None 203 d = None 204 try: 205 d = self.identifiers[name] 206 d.update(header, module, type, lineno, info, extra, conditionals) 207 except: 208 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 209 self.identifiers[name] = d 210 211 if d != None and static == 1: 212 d.set_static(1) 213 214 if d != None and name != None and type != None: 215 if type == "function": 216 self.functions[name] = d 217 elif type == "functype": 218 self.functions[name] = d 219 elif type == "variable": 220 self.variables[name] = d 221 elif type == "include": 222 self.includes[name] = d 223 elif type == "struct": 224 self.structs[name] = d 225 elif type == "enum": 226 self.enums[name] = d 227 elif type == "typedef": 228 self.typedefs[name] = d 229 elif type == "macro": 230 self.macros[name] = d 231 else: 232 print "Unable to register type ", type 233 234 if name == debugsym: 235 print "New symbol: %s" % (d) 236 237 return d 238 239 def merge(self, idx): 240 for id in idx.functions.keys(): 241 # 242 # macro might be used to override functions or variables 243 # definitions 244 # 245 if self.macros.has_key(id): 246 del self.macros[id] 247 if self.functions.has_key(id): 248 print "function %s from %s redeclared in %s" % ( 249 id, self.functions[id].header, idx.functions[id].header) 250 else: 251 self.functions[id] = idx.functions[id] 252 self.identifiers[id] = idx.functions[id] 253 for id in idx.variables.keys(): 254 # 255 # macro might be used to override functions or variables 256 # definitions 257 # 258 if self.macros.has_key(id): 259 del self.macros[id] 260 if self.variables.has_key(id): 261 print "variable %s from %s redeclared in %s" % ( 262 id, self.variables[id].header, idx.variables[id].header) 263 else: 264 self.variables[id] = idx.variables[id] 265 self.identifiers[id] = idx.variables[id] 266 for id in idx.structs.keys(): 267 if self.structs.has_key(id): 268 print "struct %s from %s redeclared in %s" % ( 269 id, self.structs[id].header, idx.structs[id].header) 270 else: 271 self.structs[id] = idx.structs[id] 272 self.identifiers[id] = idx.structs[id] 273 for id in idx.typedefs.keys(): 274 if self.typedefs.has_key(id): 275 print "typedef %s from %s redeclared in %s" % ( 276 id, self.typedefs[id].header, idx.typedefs[id].header) 277 else: 278 self.typedefs[id] = idx.typedefs[id] 279 self.identifiers[id] = idx.typedefs[id] 280 for id in idx.macros.keys(): 281 # 282 # macro might be used to override functions or variables 283 # definitions 284 # 285 if self.variables.has_key(id): 286 continue 287 if self.functions.has_key(id): 288 continue 289 if self.enums.has_key(id): 290 continue 291 if self.macros.has_key(id): 292 print "macro %s from %s redeclared in %s" % ( 293 id, self.macros[id].header, idx.macros[id].header) 294 else: 295 self.macros[id] = idx.macros[id] 296 self.identifiers[id] = idx.macros[id] 297 for id in idx.enums.keys(): 298 if self.enums.has_key(id): 299 print "enum %s from %s redeclared in %s" % ( 300 id, self.enums[id].header, idx.enums[id].header) 301 else: 302 self.enums[id] = idx.enums[id] 303 self.identifiers[id] = idx.enums[id] 304 305 def merge_public(self, idx): 306 for id in idx.functions.keys(): 307 if self.functions.has_key(id): 308 # check that function condition agrees with header 309 if idx.functions[id].conditionals != \ 310 self.functions[id].conditionals: 311 print "Header condition differs from Function for %s:" \ 312 % id 313 print " H: %s" % self.functions[id].conditionals 314 print " C: %s" % idx.functions[id].conditionals 315 up = idx.functions[id] 316 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 317 # else: 318 # print "Function %s from %s is not declared in headers" % ( 319 # id, idx.functions[id].module) 320 # TODO: do the same for variables. 321 322 def analyze_dict(self, type, dict): 323 count = 0 324 public = 0 325 for name in dict.keys(): 326 id = dict[name] 327 count = count + 1 328 if id.static == 0: 329 public = public + 1 330 if count != public: 331 print " %d %s , %d public" % (count, type, public) 332 elif count != 0: 333 print " %d public %s" % (count, type) 334 335 336 def analyze(self): 337 self.analyze_dict("functions", self.functions) 338 self.analyze_dict("variables", self.variables) 339 self.analyze_dict("structs", self.structs) 340 self.analyze_dict("typedefs", self.typedefs) 341 self.analyze_dict("macros", self.macros) 342 343class CLexer: 344 """A lexer for the C language, tokenize the input by reading and 345 analyzing it line by line""" 346 def __init__(self, input): 347 self.input = input 348 self.tokens = [] 349 self.line = "" 350 self.lineno = 0 351 352 def getline(self): 353 line = '' 354 while line == '': 355 line = self.input.readline() 356 if not line: 357 return None 358 self.lineno = self.lineno + 1 359 line = string.lstrip(line) 360 line = string.rstrip(line) 361 if line == '': 362 continue 363 while line[-1] == '\\': 364 line = line[:-1] 365 n = self.input.readline() 366 self.lineno = self.lineno + 1 367 n = string.lstrip(n) 368 n = string.rstrip(n) 369 if not n: 370 break 371 else: 372 line = line + n 373 return line 374 375 def getlineno(self): 376 return self.lineno 377 378 def push(self, token): 379 self.tokens.insert(0, token); 380 381 def debug(self): 382 print "Last token: ", self.last 383 print "Token queue: ", self.tokens 384 print "Line %d end: " % (self.lineno), self.line 385 386 def token(self): 387 while self.tokens == []: 388 if self.line == "": 389 line = self.getline() 390 else: 391 line = self.line 392 self.line = "" 393 if line == None: 394 return None 395 396 if line[0] == '#': 397 self.tokens = map((lambda x: ('preproc', x)), 398 string.split(line)) 399 break; 400 l = len(line) 401 if line[0] == '"' or line[0] == "'": 402 end = line[0] 403 line = line[1:] 404 found = 0 405 tok = "" 406 while found == 0: 407 i = 0 408 l = len(line) 409 while i < l: 410 if line[i] == end: 411 self.line = line[i+1:] 412 line = line[:i] 413 l = i 414 found = 1 415 break 416 if line[i] == '\\': 417 i = i + 1 418 i = i + 1 419 tok = tok + line 420 if found == 0: 421 line = self.getline() 422 if line == None: 423 return None 424 self.last = ('string', tok) 425 return self.last 426 427 if l >= 2 and line[0] == '/' and line[1] == '*': 428 line = line[2:] 429 found = 0 430 tok = "" 431 while found == 0: 432 i = 0 433 l = len(line) 434 while i < l: 435 if line[i] == '*' and i+1 < l and line[i+1] == '/': 436 self.line = line[i+2:] 437 line = line[:i-1] 438 l = i 439 found = 1 440 break 441 i = i + 1 442 if tok != "": 443 tok = tok + "\n" 444 tok = tok + line 445 if found == 0: 446 line = self.getline() 447 if line == None: 448 return None 449 self.last = ('comment', tok) 450 return self.last 451 if l >= 2 and line[0] == '/' and line[1] == '/': 452 line = line[2:] 453 self.last = ('comment', line) 454 return self.last 455 i = 0 456 while i < l: 457 if line[i] == '/' and i+1 < l and line[i+1] == '/': 458 self.line = line[i:] 459 line = line[:i] 460 break 461 if line[i] == '/' and i+1 < l and line[i+1] == '*': 462 self.line = line[i:] 463 line = line[:i] 464 break 465 if line[i] == '"' or line[i] == "'": 466 self.line = line[i:] 467 line = line[:i] 468 break 469 i = i + 1 470 l = len(line) 471 i = 0 472 while i < l: 473 if line[i] == ' ' or line[i] == '\t': 474 i = i + 1 475 continue 476 o = ord(line[i]) 477 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 478 (o >= 48 and o <= 57): 479 s = i 480 while i < l: 481 o = ord(line[i]) 482 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 483 (o >= 48 and o <= 57) or string.find( 484 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 485 i = i + 1 486 else: 487 break 488 self.tokens.append(('name', line[s:i])) 489 continue 490 if string.find("(){}:;,[]", line[i]) != -1: 491# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 492# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 493# line[i] == ',' or line[i] == '[' or line[i] == ']': 494 self.tokens.append(('sep', line[i])) 495 i = i + 1 496 continue 497 if string.find("+-*><=/%&!|.", line[i]) != -1: 498# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 499# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 500# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 501# line[i] == '!' or line[i] == '|' or line[i] == '.': 502 if line[i] == '.' and i + 2 < l and \ 503 line[i+1] == '.' and line[i+2] == '.': 504 self.tokens.append(('name', '...')) 505 i = i + 3 506 continue 507 508 j = i + 1 509 if j < l and ( 510 string.find("+-*><=/%&!|", line[j]) != -1): 511# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 512# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 513# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 514# line[j] == '!' or line[j] == '|'): 515 self.tokens.append(('op', line[i:j+1])) 516 i = j + 1 517 else: 518 self.tokens.append(('op', line[i])) 519 i = i + 1 520 continue 521 s = i 522 while i < l: 523 o = ord(line[i]) 524 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 525 (o >= 48 and o <= 57) or ( 526 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 527# line[i] != ' ' and line[i] != '\t' and 528# line[i] != '(' and line[i] != ')' and 529# line[i] != '{' and line[i] != '}' and 530# line[i] != ':' and line[i] != ';' and 531# line[i] != ',' and line[i] != '+' and 532# line[i] != '-' and line[i] != '*' and 533# line[i] != '/' and line[i] != '%' and 534# line[i] != '&' and line[i] != '!' and 535# line[i] != '|' and line[i] != '[' and 536# line[i] != ']' and line[i] != '=' and 537# line[i] != '*' and line[i] != '>' and 538# line[i] != '<'): 539 i = i + 1 540 else: 541 break 542 self.tokens.append(('name', line[s:i])) 543 544 tok = self.tokens[0] 545 self.tokens = self.tokens[1:] 546 self.last = tok 547 return tok 548 549class CParser: 550 """The C module parser""" 551 def __init__(self, filename, idx = None): 552 self.filename = filename 553 if len(filename) > 2 and filename[-2:] == '.h': 554 self.is_header = 1 555 else: 556 self.is_header = 0 557 self.input = open(filename) 558 self.lexer = CLexer(self.input) 559 if idx == None: 560 self.index = index() 561 else: 562 self.index = idx 563 self.top_comment = "" 564 self.last_comment = "" 565 self.comment = None 566 self.collect_ref = 0 567 self.no_error = 0 568 self.conditionals = [] 569 self.defines = [] 570 571 def collect_references(self): 572 self.collect_ref = 1 573 574 def stop_error(self): 575 self.no_error = 1 576 577 def start_error(self): 578 self.no_error = 0 579 580 def lineno(self): 581 return self.lexer.getlineno() 582 583 def index_add(self, name, module, static, type, info=None, extra = None): 584 if self.is_header == 1: 585 self.index.add(name, module, module, static, type, self.lineno(), 586 info, extra, self.conditionals) 587 else: 588 self.index.add(name, None, module, static, type, self.lineno(), 589 info, extra, self.conditionals) 590 591 def index_add_ref(self, name, module, static, type, info=None, 592 extra = None): 593 if self.is_header == 1: 594 self.index.add_ref(name, module, module, static, type, 595 self.lineno(), info, extra, self.conditionals) 596 else: 597 self.index.add_ref(name, None, module, static, type, self.lineno(), 598 info, extra, self.conditionals) 599 600 def warning(self, msg): 601 if self.no_error: 602 return 603 print msg 604 605 def error(self, msg, token=-1): 606 if self.no_error: 607 return 608 609 print "Parse Error: " + msg 610 if token != -1: 611 print "Got token ", token 612 self.lexer.debug() 613 sys.exit(1) 614 615 def debug(self, msg, token=-1): 616 print "Debug: " + msg 617 if token != -1: 618 print "Got token ", token 619 self.lexer.debug() 620 621 def parseTopComment(self, comment): 622 res = {} 623 lines = string.split(comment, "\n") 624 item = None 625 for line in lines: 626 while line != "" and (line[0] == ' ' or line[0] == '\t'): 627 line = line[1:] 628 while line != "" and line[0] == '*': 629 line = line[1:] 630 while line != "" and (line[0] == ' ' or line[0] == '\t'): 631 line = line[1:] 632 try: 633 (it, line) = string.split(line, ":", 1) 634 item = it 635 while line != "" and (line[0] == ' ' or line[0] == '\t'): 636 line = line[1:] 637 if res.has_key(item): 638 res[item] = res[item] + " " + line 639 else: 640 res[item] = line 641 except: 642 if item != None: 643 if res.has_key(item): 644 res[item] = res[item] + " " + line 645 else: 646 res[item] = line 647 self.index.info = res 648 649 def parseComment(self, token): 650 if self.top_comment == "": 651 self.top_comment = token[1] 652 if self.comment == None or token[1][0] == '*': 653 self.comment = token[1]; 654 else: 655 self.comment = self.comment + token[1] 656 token = self.lexer.token() 657 658 if string.find(self.comment, "DOC_DISABLE") != -1: 659 self.stop_error() 660 661 if string.find(self.comment, "DOC_ENABLE") != -1: 662 self.start_error() 663 664 return token 665 666 # 667 # Parse a comment block associate to a macro 668 # 669 def parseMacroComment(self, name, quiet = 0): 670 if name[0:2] == '__': 671 quiet = 1 672 673 args = [] 674 desc = "" 675 676 if self.comment == None: 677 if not quiet: 678 self.warning("Missing comment for macro %s" % (name)) 679 return((args, desc)) 680 if self.comment[0] != '*': 681 if not quiet: 682 self.warning("Missing * in macro comment for %s" % (name)) 683 return((args, desc)) 684 lines = string.split(self.comment, '\n') 685 if lines[0] == '*': 686 del lines[0] 687 if lines[0] != "* %s:" % (name): 688 if not quiet: 689 self.warning("Misformatted macro comment for %s" % (name)) 690 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 691 return((args, desc)) 692 del lines[0] 693 while lines[0] == '*': 694 del lines[0] 695 while len(lines) > 0 and lines[0][0:3] == '* @': 696 l = lines[0][3:] 697 try: 698 (arg, desc) = string.split(l, ':', 1) 699 desc=string.strip(desc) 700 arg=string.strip(arg) 701 except: 702 if not quiet: 703 self.warning("Misformatted macro comment for %s" % (name)) 704 self.warning(" problem with '%s'" % (lines[0])) 705 del lines[0] 706 continue 707 del lines[0] 708 l = string.strip(lines[0]) 709 while len(l) > 2 and l[0:3] != '* @': 710 while l[0] == '*': 711 l = l[1:] 712 desc = desc + ' ' + string.strip(l) 713 del lines[0] 714 if len(lines) == 0: 715 break 716 l = lines[0] 717 args.append((arg, desc)) 718 while len(lines) > 0 and lines[0] == '*': 719 del lines[0] 720 desc = "" 721 while len(lines) > 0: 722 l = lines[0] 723 while len(l) > 0 and l[0] == '*': 724 l = l[1:] 725 l = string.strip(l) 726 desc = desc + " " + l 727 del lines[0] 728 729 desc = string.strip(desc) 730 731 if quiet == 0: 732 if desc == "": 733 self.warning("Macro comment for %s lack description of the macro" % (name)) 734 735 return((args, desc)) 736 737 # 738 # Parse a comment block and merge the informations found in the 739 # parameters descriptions, finally returns a block as complete 740 # as possible 741 # 742 def mergeFunctionComment(self, name, description, quiet = 0): 743 if name == 'main': 744 quiet = 1 745 if name[0:2] == '__': 746 quiet = 1 747 748 (ret, args) = description 749 desc = "" 750 retdesc = "" 751 752 if self.comment == None: 753 if not quiet: 754 self.warning("Missing comment for function %s" % (name)) 755 return(((ret[0], retdesc), args, desc)) 756 if self.comment[0] != '*': 757 if not quiet: 758 self.warning("Missing * in function comment for %s" % (name)) 759 return(((ret[0], retdesc), args, desc)) 760 lines = string.split(self.comment, '\n') 761 if lines[0] == '*': 762 del lines[0] 763 if lines[0] != "* %s:" % (name): 764 if not quiet: 765 self.warning("Misformatted function comment for %s" % (name)) 766 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 767 return(((ret[0], retdesc), args, desc)) 768 del lines[0] 769 while lines[0] == '*': 770 del lines[0] 771 nbargs = len(args) 772 while len(lines) > 0 and lines[0][0:3] == '* @': 773 l = lines[0][3:] 774 try: 775 (arg, desc) = string.split(l, ':', 1) 776 desc=string.strip(desc) 777 arg=string.strip(arg) 778 except: 779 if not quiet: 780 self.warning("Misformatted function comment for %s" % (name)) 781 self.warning(" problem with '%s'" % (lines[0])) 782 del lines[0] 783 continue 784 del lines[0] 785 l = string.strip(lines[0]) 786 while len(l) > 2 and l[0:3] != '* @': 787 while l[0] == '*': 788 l = l[1:] 789 desc = desc + ' ' + string.strip(l) 790 del lines[0] 791 if len(lines) == 0: 792 break 793 l = lines[0] 794 i = 0 795 while i < nbargs: 796 if args[i][1] == arg: 797 args[i] = (args[i][0], arg, desc) 798 break; 799 i = i + 1 800 if i >= nbargs: 801 if not quiet: 802 self.warning("Unable to find arg %s from function comment for %s" % ( 803 arg, name)) 804 while len(lines) > 0 and lines[0] == '*': 805 del lines[0] 806 desc = "" 807 while len(lines) > 0: 808 l = lines[0] 809 while len(l) > 0 and l[0] == '*': 810 l = l[1:] 811 l = string.strip(l) 812 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 813 try: 814 l = string.split(l, ' ', 1)[1] 815 except: 816 l = "" 817 retdesc = string.strip(l) 818 del lines[0] 819 while len(lines) > 0: 820 l = lines[0] 821 while len(l) > 0 and l[0] == '*': 822 l = l[1:] 823 l = string.strip(l) 824 retdesc = retdesc + " " + l 825 del lines[0] 826 else: 827 desc = desc + " " + l 828 del lines[0] 829 830 retdesc = string.strip(retdesc) 831 desc = string.strip(desc) 832 833 if quiet == 0: 834 # 835 # report missing comments 836 # 837 i = 0 838 while i < nbargs: 839 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: 840 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 841 i = i + 1 842 if retdesc == "" and ret[0] != "void": 843 self.warning("Function comment for %s lacks description of return value" % (name)) 844 if desc == "": 845 self.warning("Function comment for %s lacks description of the function" % (name)) 846 847 848 return(((ret[0], retdesc), args, desc)) 849 850 def parsePreproc(self, token): 851 if debug: 852 print "=> preproc ", token, self.lexer.tokens 853 name = token[1] 854 if name == "#include": 855 token = self.lexer.token() 856 if token == None: 857 return None 858 if token[0] == 'preproc': 859 self.index_add(token[1], self.filename, not self.is_header, 860 "include") 861 return self.lexer.token() 862 return token 863 if name == "#define": 864 token = self.lexer.token() 865 if token == None: 866 return None 867 if token[0] == 'preproc': 868 # TODO macros with arguments 869 name = token[1] 870 lst = [] 871 token = self.lexer.token() 872 while token != None and token[0] == 'preproc' and \ 873 token[1][0] != '#': 874 lst.append(token[1]) 875 token = self.lexer.token() 876 try: 877 name = string.split(name, '(') [0] 878 except: 879 pass 880 info = self.parseMacroComment(name, not self.is_header) 881 self.index_add(name, self.filename, not self.is_header, 882 "macro", info) 883 return token 884 885 # 886 # Processing of conditionals modified by Bill 1/1/05 887 # 888 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 889 # #if, #else and #endif) for headers and mainline code, 890 # store the ones from the header in libxml2-api.xml, and later 891 # (in the routine merge_public) verify that the two (header and 892 # mainline code) agree. 893 # 894 # There is a small problem with processing the headers. Some of 895 # the variables are not concerned with enabling / disabling of 896 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 897 # them to be included in libxml2-api.xml, or involved in 898 # the check between the header and the mainline code. To 899 # accomplish this, we ignore any conditional which doesn't include 900 # the string 'ENABLED' 901 # 902 if name == "#ifdef": 903 apstr = self.lexer.tokens[0][1] 904 try: 905 self.defines.append(apstr) 906 if string.find(apstr, 'ENABLED') != -1: 907 self.conditionals.append("defined(%s)" % apstr) 908 except: 909 pass 910 elif name == "#ifndef": 911 apstr = self.lexer.tokens[0][1] 912 try: 913 self.defines.append(apstr) 914 if string.find(apstr, 'ENABLED') != -1: 915 self.conditionals.append("!defined(%s)" % apstr) 916 except: 917 pass 918 elif name == "#if": 919 apstr = "" 920 for tok in self.lexer.tokens: 921 if apstr != "": 922 apstr = apstr + " " 923 apstr = apstr + tok[1] 924 try: 925 self.defines.append(apstr) 926 if string.find(apstr, 'ENABLED') != -1: 927 self.conditionals.append(apstr) 928 except: 929 pass 930 elif name == "#else": 931 if self.conditionals != [] and \ 932 string.find(self.defines[-1], 'ENABLED') != -1: 933 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 934 elif name == "#endif": 935 if self.conditionals != [] and \ 936 string.find(self.defines[-1], 'ENABLED') != -1: 937 self.conditionals = self.conditionals[:-1] 938 self.defines = self.defines[:-1] 939 token = self.lexer.token() 940 while token != None and token[0] == 'preproc' and \ 941 token[1][0] != '#': 942 token = self.lexer.token() 943 return token 944 945 # 946 # token acquisition on top of the lexer, it handle internally 947 # preprocessor and comments since they are logically not part of 948 # the program structure. 949 # 950 def token(self): 951 global ignored_words 952 953 token = self.lexer.token() 954 while token != None: 955 if token[0] == 'comment': 956 token = self.parseComment(token) 957 continue 958 elif token[0] == 'preproc': 959 token = self.parsePreproc(token) 960 continue 961 elif token[0] == "name" and token[1] == "__const": 962 token = ("name", "const") 963 return token 964 elif token[0] == "name" and token[1] == "__attribute": 965 token = self.lexer.token() 966 while token != None and token[1] != ";": 967 token = self.lexer.token() 968 return token 969 elif token[0] == "name" and ignored_words.has_key(token[1]): 970 (n, info) = ignored_words[token[1]] 971 i = 0 972 while i < n: 973 token = self.lexer.token() 974 i = i + 1 975 token = self.lexer.token() 976 continue 977 else: 978 if debug: 979 print "=> ", token 980 return token 981 return None 982 983 # 984 # Parse a typedef, it records the type and its name. 985 # 986 def parseTypedef(self, token): 987 if token == None: 988 return None 989 token = self.parseType(token) 990 if token == None: 991 self.error("parsing typedef") 992 return None 993 base_type = self.type 994 type = base_type 995 #self.debug("end typedef type", token) 996 while token != None: 997 if token[0] == "name": 998 name = token[1] 999 signature = self.signature 1000 if signature != None: 1001 type = string.split(type, '(')[0] 1002 d = self.mergeFunctionComment(name, 1003 ((type, None), signature), 1) 1004 self.index_add(name, self.filename, not self.is_header, 1005 "functype", d) 1006 else: 1007 if base_type == "struct": 1008 self.index_add(name, self.filename, not self.is_header, 1009 "struct", type) 1010 base_type = "struct " + name 1011 else: 1012 self.index_add(name, self.filename, not self.is_header, 1013 "typedef", type) 1014 token = self.token() 1015 else: 1016 self.error("parsing typedef: expecting a name") 1017 return token 1018 #self.debug("end typedef", token) 1019 if token != None and token[0] == 'sep' and token[1] == ',': 1020 type = base_type 1021 token = self.token() 1022 while token != None and token[0] == "op": 1023 type = type + token[1] 1024 token = self.token() 1025 elif token != None and token[0] == 'sep' and token[1] == ';': 1026 break; 1027 elif token != None and token[0] == 'name': 1028 type = base_type 1029 continue; 1030 else: 1031 self.error("parsing typedef: expecting ';'", token) 1032 return token 1033 token = self.token() 1034 return token 1035 1036 # 1037 # Parse a C code block, used for functions it parse till 1038 # the balancing } included 1039 # 1040 def parseBlock(self, token): 1041 while token != None: 1042 if token[0] == "sep" and token[1] == "{": 1043 token = self.token() 1044 token = self.parseBlock(token) 1045 elif token[0] == "sep" and token[1] == "}": 1046 self.comment = None 1047 token = self.token() 1048 return token 1049 else: 1050 if self.collect_ref == 1: 1051 oldtok = token 1052 token = self.token() 1053 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1054 if token[0] == "sep" and token[1] == "(": 1055 self.index_add_ref(oldtok[1], self.filename, 1056 0, "function") 1057 token = self.token() 1058 elif token[0] == "name": 1059 token = self.token() 1060 if token[0] == "sep" and (token[1] == ";" or 1061 token[1] == "," or token[1] == "="): 1062 self.index_add_ref(oldtok[1], self.filename, 1063 0, "type") 1064 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1065 self.index_add_ref(oldtok[1], self.filename, 1066 0, "typedef") 1067 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1068 self.index_add_ref(oldtok[1], self.filename, 1069 0, "typedef") 1070 1071 else: 1072 token = self.token() 1073 return token 1074 1075 # 1076 # Parse a C struct definition till the balancing } 1077 # 1078 def parseStruct(self, token): 1079 fields = [] 1080 #self.debug("start parseStruct", token) 1081 while token != None: 1082 if token[0] == "sep" and token[1] == "{": 1083 token = self.token() 1084 token = self.parseTypeBlock(token) 1085 elif token[0] == "sep" and token[1] == "}": 1086 self.struct_fields = fields 1087 #self.debug("end parseStruct", token) 1088 #print fields 1089 token = self.token() 1090 return token 1091 else: 1092 base_type = self.type 1093 #self.debug("before parseType", token) 1094 token = self.parseType(token) 1095 #self.debug("after parseType", token) 1096 if token != None and token[0] == "name": 1097 fname = token[1] 1098 token = self.token() 1099 if token[0] == "sep" and token[1] == ";": 1100 self.comment = None 1101 token = self.token() 1102 fields.append((self.type, fname, self.comment)) 1103 self.comment = None 1104 else: 1105 self.error("parseStruct: expecting ;", token) 1106 elif token != None and token[0] == "sep" and token[1] == "{": 1107 token = self.token() 1108 token = self.parseTypeBlock(token) 1109 if token != None and token[0] == "name": 1110 token = self.token() 1111 if token != None and token[0] == "sep" and token[1] == ";": 1112 token = self.token() 1113 else: 1114 self.error("parseStruct: expecting ;", token) 1115 else: 1116 self.error("parseStruct: name", token) 1117 token = self.token() 1118 self.type = base_type; 1119 self.struct_fields = fields 1120 #self.debug("end parseStruct", token) 1121 #print fields 1122 return token 1123 1124 # 1125 # Parse a C enum block, parse till the balancing } 1126 # 1127 def parseEnumBlock(self, token): 1128 self.enums = [] 1129 name = None 1130 self.comment = None 1131 comment = "" 1132 value = "0" 1133 while token != None: 1134 if token[0] == "sep" and token[1] == "{": 1135 token = self.token() 1136 token = self.parseTypeBlock(token) 1137 elif token[0] == "sep" and token[1] == "}": 1138 if name != None: 1139 if self.comment != None: 1140 comment = self.comment 1141 self.comment = None 1142 self.enums.append((name, value, comment)) 1143 token = self.token() 1144 return token 1145 elif token[0] == "name": 1146 if name != None: 1147 if self.comment != None: 1148 comment = string.strip(self.comment) 1149 self.comment = None 1150 self.enums.append((name, value, comment)) 1151 name = token[1] 1152 comment = "" 1153 token = self.token() 1154 if token[0] == "op" and token[1][0] == "=": 1155 value = "" 1156 if len(token[1]) > 1: 1157 value = token[1][1:] 1158 token = self.token() 1159 while token[0] != "sep" or (token[1] != ',' and 1160 token[1] != '}'): 1161 value = value + token[1] 1162 token = self.token() 1163 else: 1164 try: 1165 value = "%d" % (int(value) + 1) 1166 except: 1167 self.warning("Failed to compute value of enum %s" % (name)) 1168 value="" 1169 if token[0] == "sep" and token[1] == ",": 1170 token = self.token() 1171 else: 1172 token = self.token() 1173 return token 1174 1175 # 1176 # Parse a C definition block, used for structs it parse till 1177 # the balancing } 1178 # 1179 def parseTypeBlock(self, token): 1180 while token != None: 1181 if token[0] == "sep" and token[1] == "{": 1182 token = self.token() 1183 token = self.parseTypeBlock(token) 1184 elif token[0] == "sep" and token[1] == "}": 1185 token = self.token() 1186 return token 1187 else: 1188 token = self.token() 1189 return token 1190 1191 # 1192 # Parse a type: the fact that the type name can either occur after 1193 # the definition or within the definition makes it a little harder 1194 # if inside, the name token is pushed back before returning 1195 # 1196 def parseType(self, token): 1197 self.type = "" 1198 self.struct_fields = [] 1199 self.signature = None 1200 if token == None: 1201 return token 1202 1203 while token[0] == "name" and ( 1204 token[1] == "const" or token[1] == "unsigned"): 1205 if self.type == "": 1206 self.type = token[1] 1207 else: 1208 self.type = self.type + " " + token[1] 1209 token = self.token() 1210 1211 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1212 if self.type == "": 1213 self.type = token[1] 1214 else: 1215 self.type = self.type + " " + token[1] 1216 if token[0] == "name" and token[1] == "int": 1217 if self.type == "": 1218 self.type = tmp[1] 1219 else: 1220 self.type = self.type + " " + tmp[1] 1221 1222 elif token[0] == "name" and token[1] == "struct": 1223 if self.type == "": 1224 self.type = token[1] 1225 else: 1226 self.type = self.type + " " + token[1] 1227 token = self.token() 1228 nametok = None 1229 if token[0] == "name": 1230 nametok = token 1231 token = self.token() 1232 if token != None and token[0] == "sep" and token[1] == "{": 1233 token = self.token() 1234 token = self.parseStruct(token) 1235 elif token != None and token[0] == "op" and token[1] == "*": 1236 self.type = self.type + " " + nametok[1] + " *" 1237 token = self.token() 1238 while token != None and token[0] == "op" and token[1] == "*": 1239 self.type = self.type + " *" 1240 token = self.token() 1241 if token[0] == "name": 1242 nametok = token 1243 token = self.token() 1244 else: 1245 self.error("struct : expecting name", token) 1246 return token 1247 elif token != None and token[0] == "name" and nametok != None: 1248 self.type = self.type + " " + nametok[1] 1249 return token 1250 1251 if nametok != None: 1252 self.lexer.push(token) 1253 token = nametok 1254 return token 1255 1256 elif token[0] == "name" and token[1] == "enum": 1257 if self.type == "": 1258 self.type = token[1] 1259 else: 1260 self.type = self.type + " " + token[1] 1261 self.enums = [] 1262 token = self.token() 1263 if token != None and token[0] == "sep" and token[1] == "{": 1264 token = self.token() 1265 token = self.parseEnumBlock(token) 1266 else: 1267 self.error("parsing enum: expecting '{'", token) 1268 enum_type = None 1269 if token != None and token[0] != "name": 1270 self.lexer.push(token) 1271 token = ("name", "enum") 1272 else: 1273 enum_type = token[1] 1274 for enum in self.enums: 1275 self.index_add(enum[0], self.filename, 1276 not self.is_header, "enum", 1277 (enum[1], enum[2], enum_type)) 1278 return token 1279 1280 elif token[0] == "name": 1281 if self.type == "": 1282 self.type = token[1] 1283 else: 1284 self.type = self.type + " " + token[1] 1285 else: 1286 self.error("parsing type %s: expecting a name" % (self.type), 1287 token) 1288 return token 1289 token = self.token() 1290 while token != None and (token[0] == "op" or 1291 token[0] == "name" and token[1] == "const"): 1292 self.type = self.type + " " + token[1] 1293 token = self.token() 1294 1295 # 1296 # if there is a parenthesis here, this means a function type 1297 # 1298 if token != None and token[0] == "sep" and token[1] == '(': 1299 self.type = self.type + token[1] 1300 token = self.token() 1301 while token != None and token[0] == "op" and token[1] == '*': 1302 self.type = self.type + token[1] 1303 token = self.token() 1304 if token == None or token[0] != "name" : 1305 self.error("parsing function type, name expected", token); 1306 return token 1307 self.type = self.type + token[1] 1308 nametok = token 1309 token = self.token() 1310 if token != None and token[0] == "sep" and token[1] == ')': 1311 self.type = self.type + token[1] 1312 token = self.token() 1313 if token != None and token[0] == "sep" and token[1] == '(': 1314 token = self.token() 1315 type = self.type; 1316 token = self.parseSignature(token); 1317 self.type = type; 1318 else: 1319 self.error("parsing function type, '(' expected", token); 1320 return token 1321 else: 1322 self.error("parsing function type, ')' expected", token); 1323 return token 1324 self.lexer.push(token) 1325 token = nametok 1326 return token 1327 1328 # 1329 # do some lookahead for arrays 1330 # 1331 if token != None and token[0] == "name": 1332 nametok = token 1333 token = self.token() 1334 if token != None and token[0] == "sep" and token[1] == '[': 1335 self.type = self.type + nametok[1] 1336 while token != None and token[0] == "sep" and token[1] == '[': 1337 self.type = self.type + token[1] 1338 token = self.token() 1339 while token != None and token[0] != 'sep' and \ 1340 token[1] != ']' and token[1] != ';': 1341 self.type = self.type + token[1] 1342 token = self.token() 1343 if token != None and token[0] == 'sep' and token[1] == ']': 1344 self.type = self.type + token[1] 1345 token = self.token() 1346 else: 1347 self.error("parsing array type, ']' expected", token); 1348 return token 1349 elif token != None and token[0] == "sep" and token[1] == ':': 1350 # remove :12 in case it's a limited int size 1351 token = self.token() 1352 token = self.token() 1353 self.lexer.push(token) 1354 token = nametok 1355 1356 return token 1357 1358 # 1359 # Parse a signature: '(' has been parsed and we scan the type definition 1360 # up to the ')' included 1361 def parseSignature(self, token): 1362 signature = [] 1363 if token != None and token[0] == "sep" and token[1] == ')': 1364 self.signature = [] 1365 token = self.token() 1366 return token 1367 while token != None: 1368 token = self.parseType(token) 1369 if token != None and token[0] == "name": 1370 signature.append((self.type, token[1], None)) 1371 token = self.token() 1372 elif token != None and token[0] == "sep" and token[1] == ',': 1373 token = self.token() 1374 continue 1375 elif token != None and token[0] == "sep" and token[1] == ')': 1376 # only the type was provided 1377 if self.type == "...": 1378 signature.append((self.type, "...", None)) 1379 else: 1380 signature.append((self.type, None, None)) 1381 if token != None and token[0] == "sep": 1382 if token[1] == ',': 1383 token = self.token() 1384 continue 1385 elif token[1] == ')': 1386 token = self.token() 1387 break 1388 self.signature = signature 1389 return token 1390 1391 # 1392 # Parse a global definition, be it a type, variable or function 1393 # the extern "C" blocks are a bit nasty and require it to recurse. 1394 # 1395 def parseGlobal(self, token): 1396 static = 0 1397 if token[1] == 'extern': 1398 token = self.token() 1399 if token == None: 1400 return token 1401 if token[0] == 'string': 1402 if token[1] == 'C': 1403 token = self.token() 1404 if token == None: 1405 return token 1406 if token[0] == 'sep' and token[1] == "{": 1407 token = self.token() 1408# print 'Entering extern "C line ', self.lineno() 1409 while token != None and (token[0] != 'sep' or 1410 token[1] != "}"): 1411 if token[0] == 'name': 1412 token = self.parseGlobal(token) 1413 else: 1414 self.error( 1415 "token %s %s unexpected at the top level" % ( 1416 token[0], token[1])) 1417 token = self.parseGlobal(token) 1418# print 'Exiting extern "C" line', self.lineno() 1419 token = self.token() 1420 return token 1421 else: 1422 return token 1423 elif token[1] == 'static': 1424 static = 1 1425 token = self.token() 1426 if token == None or token[0] != 'name': 1427 return token 1428 1429 if token[1] == 'typedef': 1430 token = self.token() 1431 return self.parseTypedef(token) 1432 else: 1433 token = self.parseType(token) 1434 type_orig = self.type 1435 if token == None or token[0] != "name": 1436 return token 1437 type = type_orig 1438 self.name = token[1] 1439 token = self.token() 1440 while token != None and (token[0] == "sep" or token[0] == "op"): 1441 if token[0] == "sep": 1442 if token[1] == "[": 1443 type = type + token[1] 1444 token = self.token() 1445 while token != None and (token[0] != "sep" or \ 1446 token[1] != ";"): 1447 type = type + token[1] 1448 token = self.token() 1449 1450 if token != None and token[0] == "op" and token[1] == "=": 1451 # 1452 # Skip the initialization of the variable 1453 # 1454 token = self.token() 1455 if token[0] == 'sep' and token[1] == '{': 1456 token = self.token() 1457 token = self.parseBlock(token) 1458 else: 1459 self.comment = None 1460 while token != None and (token[0] != "sep" or \ 1461 (token[1] != ';' and token[1] != ',')): 1462 token = self.token() 1463 self.comment = None 1464 if token == None or token[0] != "sep" or (token[1] != ';' and 1465 token[1] != ','): 1466 self.error("missing ';' or ',' after value") 1467 1468 if token != None and token[0] == "sep": 1469 if token[1] == ";": 1470 self.comment = None 1471 token = self.token() 1472 if type == "struct": 1473 self.index_add(self.name, self.filename, 1474 not self.is_header, "struct", self.struct_fields) 1475 else: 1476 self.index_add(self.name, self.filename, 1477 not self.is_header, "variable", type) 1478 break 1479 elif token[1] == "(": 1480 token = self.token() 1481 token = self.parseSignature(token) 1482 if token == None: 1483 return None 1484 if token[0] == "sep" and token[1] == ";": 1485 d = self.mergeFunctionComment(self.name, 1486 ((type, None), self.signature), 1) 1487 self.index_add(self.name, self.filename, static, 1488 "function", d) 1489 token = self.token() 1490 elif token[0] == "sep" and token[1] == "{": 1491 d = self.mergeFunctionComment(self.name, 1492 ((type, None), self.signature), static) 1493 self.index_add(self.name, self.filename, static, 1494 "function", d) 1495 token = self.token() 1496 token = self.parseBlock(token); 1497 elif token[1] == ',': 1498 self.comment = None 1499 self.index_add(self.name, self.filename, static, 1500 "variable", type) 1501 type = type_orig 1502 token = self.token() 1503 while token != None and token[0] == "sep": 1504 type = type + token[1] 1505 token = self.token() 1506 if token != None and token[0] == "name": 1507 self.name = token[1] 1508 token = self.token() 1509 else: 1510 break 1511 1512 return token 1513 1514 def parse(self): 1515 self.warning("Parsing %s" % (self.filename)) 1516 token = self.token() 1517 while token != None: 1518 if token[0] == 'name': 1519 token = self.parseGlobal(token) 1520 else: 1521 self.error("token %s %s unexpected at the top level" % ( 1522 token[0], token[1])) 1523 token = self.parseGlobal(token) 1524 return 1525 self.parseTopComment(self.top_comment) 1526 return self.index 1527 1528 1529class docBuilder: 1530 """A documentation builder""" 1531 def __init__(self, name, directories=['.'], excludes=[]): 1532 self.name = name 1533 self.directories = directories 1534 self.excludes = excludes + ignored_files.keys() 1535 self.modules = {} 1536 self.headers = {} 1537 self.idx = index() 1538 self.xref = {} 1539 self.index = {} 1540 if name == 'libxml2': 1541 self.basename = 'libxml' 1542 else: 1543 self.basename = name 1544 1545 def indexString(self, id, str): 1546 if str == None: 1547 return 1548 str = string.replace(str, "'", ' ') 1549 str = string.replace(str, '"', ' ') 1550 str = string.replace(str, "/", ' ') 1551 str = string.replace(str, '*', ' ') 1552 str = string.replace(str, "[", ' ') 1553 str = string.replace(str, "]", ' ') 1554 str = string.replace(str, "(", ' ') 1555 str = string.replace(str, ")", ' ') 1556 str = string.replace(str, "<", ' ') 1557 str = string.replace(str, '>', ' ') 1558 str = string.replace(str, "&", ' ') 1559 str = string.replace(str, '#', ' ') 1560 str = string.replace(str, ",", ' ') 1561 str = string.replace(str, '.', ' ') 1562 str = string.replace(str, ';', ' ') 1563 tokens = string.split(str) 1564 for token in tokens: 1565 try: 1566 c = token[0] 1567 if string.find(string.letters, c) < 0: 1568 pass 1569 elif len(token) < 3: 1570 pass 1571 else: 1572 lower = string.lower(token) 1573 # TODO: generalize this a bit 1574 if lower == 'and' or lower == 'the': 1575 pass 1576 elif self.xref.has_key(token): 1577 self.xref[token].append(id) 1578 else: 1579 self.xref[token] = [id] 1580 except: 1581 pass 1582 1583 def analyze(self): 1584 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1585 self.idx.analyze() 1586 1587 def scanHeaders(self): 1588 for header in self.headers.keys(): 1589 parser = CParser(header) 1590 idx = parser.parse() 1591 self.headers[header] = idx; 1592 self.idx.merge(idx) 1593 1594 def scanModules(self): 1595 for module in self.modules.keys(): 1596 parser = CParser(module) 1597 idx = parser.parse() 1598 # idx.analyze() 1599 self.modules[module] = idx 1600 self.idx.merge_public(idx) 1601 1602 def scan(self): 1603 for directory in self.directories: 1604 files = glob.glob(directory + "/*.c") 1605 for file in files: 1606 skip = 0 1607 for excl in self.excludes: 1608 if string.find(file, excl) != -1: 1609 skip = 1; 1610 break 1611 if skip == 0: 1612 self.modules[file] = None; 1613 files = glob.glob(directory + "/*.h") 1614 for file in files: 1615 skip = 0 1616 for excl in self.excludes: 1617 if string.find(file, excl) != -1: 1618 skip = 1; 1619 break 1620 if skip == 0: 1621 self.headers[file] = None; 1622 self.scanHeaders() 1623 self.scanModules() 1624 1625 def modulename_file(self, file): 1626 module = os.path.basename(file) 1627 if module[-2:] == '.h': 1628 module = module[:-2] 1629 elif module[-2:] == '.c': 1630 module = module[:-2] 1631 return module 1632 1633 def serialize_enum(self, output, name): 1634 id = self.idx.enums[name] 1635 output.write(" <enum name='%s' file='%s'" % (name, 1636 self.modulename_file(id.header))) 1637 if id.info != None: 1638 info = id.info 1639 if info[0] != None and info[0] != '': 1640 try: 1641 val = eval(info[0]) 1642 except: 1643 val = info[0] 1644 output.write(" value='%s'" % (val)); 1645 if info[2] != None and info[2] != '': 1646 output.write(" type='%s'" % info[2]); 1647 if info[1] != None and info[1] != '': 1648 output.write(" info='%s'" % escape(info[1])); 1649 output.write("/>\n") 1650 1651 def serialize_macro(self, output, name): 1652 id = self.idx.macros[name] 1653 output.write(" <macro name='%s' file='%s'>\n" % (name, 1654 self.modulename_file(id.header))) 1655 if id.info != None: 1656 try: 1657 (args, desc) = id.info 1658 if desc != None and desc != "": 1659 output.write(" <info>%s</info>\n" % (escape(desc))) 1660 self.indexString(name, desc) 1661 for arg in args: 1662 (name, desc) = arg 1663 if desc != None and desc != "": 1664 output.write(" <arg name='%s' info='%s'/>\n" % ( 1665 name, escape(desc))) 1666 self.indexString(name, desc) 1667 else: 1668 output.write(" <arg name='%s'/>\n" % (name)) 1669 except: 1670 pass 1671 output.write(" </macro>\n") 1672 1673 def serialize_typedef(self, output, name): 1674 id = self.idx.typedefs[name] 1675 if id.info[0:7] == 'struct ': 1676 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1677 name, self.modulename_file(id.header), id.info)) 1678 name = id.info[7:] 1679 if self.idx.structs.has_key(name) and ( \ 1680 type(self.idx.structs[name].info) == type(()) or 1681 type(self.idx.structs[name].info) == type([])): 1682 output.write(">\n"); 1683 try: 1684 for field in self.idx.structs[name].info: 1685 desc = field[2] 1686 self.indexString(name, desc) 1687 if desc == None: 1688 desc = '' 1689 else: 1690 desc = escape(desc) 1691 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1692 except: 1693 print "Failed to serialize struct %s" % (name) 1694 output.write(" </struct>\n") 1695 else: 1696 output.write("/>\n"); 1697 else : 1698 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( 1699 name, self.modulename_file(id.header), id.info)) 1700 1701 def serialize_variable(self, output, name): 1702 id = self.idx.variables[name] 1703 if id.info != None: 1704 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1705 name, self.modulename_file(id.header), id.info)) 1706 else: 1707 output.write(" <variable name='%s' file='%s'/>\n" % ( 1708 name, self.modulename_file(id.header))) 1709 1710 def serialize_function(self, output, name): 1711 id = self.idx.functions[name] 1712 if name == debugsym: 1713 print "=>", id 1714 1715 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1716 name, self.modulename_file(id.header), 1717 self.modulename_file(id.module))) 1718 # 1719 # Processing of conditionals modified by Bill 1/1/05 1720 # 1721 if id.conditionals != None: 1722 apstr = "" 1723 for cond in id.conditionals: 1724 if apstr != "": 1725 apstr = apstr + " && " 1726 apstr = apstr + cond 1727 output.write(" <cond>%s</cond>\n"% (apstr)); 1728 try: 1729 (ret, params, desc) = id.info 1730 output.write(" <info>%s</info>\n" % (escape(desc))) 1731 self.indexString(name, desc) 1732 if ret[0] != None: 1733 if ret[0] == "void": 1734 output.write(" <return type='void'/>\n") 1735 else: 1736 output.write(" <return type='%s' info='%s'/>\n" % ( 1737 ret[0], escape(ret[1]))) 1738 self.indexString(name, ret[1]) 1739 for param in params: 1740 if param[0] == 'void': 1741 continue 1742 if param[2] == None: 1743 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1744 else: 1745 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1746 self.indexString(name, param[2]) 1747 except: 1748 print "Failed to save function %s info: " % name, `id.info` 1749 output.write(" </%s>\n" % (id.type)) 1750 1751 def serialize_exports(self, output, file): 1752 module = self.modulename_file(file) 1753 output.write(" <file name='%s'>\n" % (module)) 1754 dict = self.headers[file] 1755 if dict.info != None: 1756 for data in ('Summary', 'Description', 'Author'): 1757 try: 1758 output.write(" <%s>%s</%s>\n" % ( 1759 string.lower(data), 1760 escape(dict.info[data]), 1761 string.lower(data))) 1762 except: 1763 print "Header %s lacks a %s description" % (module, data) 1764 if dict.info.has_key('Description'): 1765 desc = dict.info['Description'] 1766 if string.find(desc, "DEPRECATED") != -1: 1767 output.write(" <deprecated/>\n") 1768 1769 ids = dict.macros.keys() 1770 ids.sort() 1771 for id in uniq(ids): 1772 # Macros are sometime used to masquerade other types. 1773 if dict.functions.has_key(id): 1774 continue 1775 if dict.variables.has_key(id): 1776 continue 1777 if dict.typedefs.has_key(id): 1778 continue 1779 if dict.structs.has_key(id): 1780 continue 1781 if dict.enums.has_key(id): 1782 continue 1783 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1784 ids = dict.enums.keys() 1785 ids.sort() 1786 for id in uniq(ids): 1787 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1788 ids = dict.typedefs.keys() 1789 ids.sort() 1790 for id in uniq(ids): 1791 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1792 ids = dict.structs.keys() 1793 ids.sort() 1794 for id in uniq(ids): 1795 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1796 ids = dict.variables.keys() 1797 ids.sort() 1798 for id in uniq(ids): 1799 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1800 ids = dict.functions.keys() 1801 ids.sort() 1802 for id in uniq(ids): 1803 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1804 output.write(" </file>\n") 1805 1806 def serialize_xrefs_files(self, output): 1807 headers = self.headers.keys() 1808 headers.sort() 1809 for file in headers: 1810 module = self.modulename_file(file) 1811 output.write(" <file name='%s'>\n" % (module)) 1812 dict = self.headers[file] 1813 ids = uniq(dict.functions.keys() + dict.variables.keys() + \ 1814 dict.macros.keys() + dict.typedefs.keys() + \ 1815 dict.structs.keys() + dict.enums.keys()) 1816 ids.sort() 1817 for id in ids: 1818 output.write(" <ref name='%s'/>\n" % (id)) 1819 output.write(" </file>\n") 1820 pass 1821 1822 def serialize_xrefs_functions(self, output): 1823 funcs = {} 1824 for name in self.idx.functions.keys(): 1825 id = self.idx.functions[name] 1826 try: 1827 (ret, params, desc) = id.info 1828 for param in params: 1829 if param[0] == 'void': 1830 continue 1831 if funcs.has_key(param[0]): 1832 funcs[param[0]].append(name) 1833 else: 1834 funcs[param[0]] = [name] 1835 except: 1836 pass 1837 typ = funcs.keys() 1838 typ.sort() 1839 for type in typ: 1840 if type == '' or type == 'void' or type == "int" or \ 1841 type == "char *" or type == "const char *" : 1842 continue 1843 output.write(" <type name='%s'>\n" % (type)) 1844 ids = funcs[type] 1845 ids.sort() 1846 pid = '' # not sure why we have dups, but get rid of them! 1847 for id in ids: 1848 if id != pid: 1849 output.write(" <ref name='%s'/>\n" % (id)) 1850 pid = id 1851 output.write(" </type>\n") 1852 1853 def serialize_xrefs_constructors(self, output): 1854 funcs = {} 1855 for name in self.idx.functions.keys(): 1856 id = self.idx.functions[name] 1857 try: 1858 (ret, params, desc) = id.info 1859 if ret[0] == "void": 1860 continue 1861 if funcs.has_key(ret[0]): 1862 funcs[ret[0]].append(name) 1863 else: 1864 funcs[ret[0]] = [name] 1865 except: 1866 pass 1867 typ = funcs.keys() 1868 typ.sort() 1869 for type in typ: 1870 if type == '' or type == 'void' or type == "int" or \ 1871 type == "char *" or type == "const char *" : 1872 continue 1873 output.write(" <type name='%s'>\n" % (type)) 1874 ids = funcs[type] 1875 ids.sort() 1876 for id in ids: 1877 output.write(" <ref name='%s'/>\n" % (id)) 1878 output.write(" </type>\n") 1879 1880 def serialize_xrefs_alpha(self, output): 1881 letter = None 1882 ids = self.idx.identifiers.keys() 1883 ids.sort() 1884 for id in ids: 1885 if id[0] != letter: 1886 if letter != None: 1887 output.write(" </letter>\n") 1888 letter = id[0] 1889 output.write(" <letter name='%s'>\n" % (letter)) 1890 output.write(" <ref name='%s'/>\n" % (id)) 1891 if letter != None: 1892 output.write(" </letter>\n") 1893 1894 def serialize_xrefs_references(self, output): 1895 typ = self.idx.identifiers.keys() 1896 typ.sort() 1897 for id in typ: 1898 idf = self.idx.identifiers[id] 1899 module = idf.header 1900 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1901 'html/' + self.basename + '-' + 1902 self.modulename_file(module) + '.html#' + 1903 id)) 1904 1905 def serialize_xrefs_index(self, output): 1906 index = self.xref 1907 typ = index.keys() 1908 typ.sort() 1909 letter = None 1910 count = 0 1911 chunk = 0 1912 chunks = [] 1913 for id in typ: 1914 if len(index[id]) > 30: 1915 continue 1916 if id[0] != letter: 1917 if letter == None or count > 200: 1918 if letter != None: 1919 output.write(" </letter>\n") 1920 output.write(" </chunk>\n") 1921 count = 0 1922 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 1923 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 1924 first_letter = id[0] 1925 chunk = chunk + 1 1926 elif letter != None: 1927 output.write(" </letter>\n") 1928 letter = id[0] 1929 output.write(" <letter name='%s'>\n" % (letter)) 1930 output.write(" <word name='%s'>\n" % (id)) 1931 tokens = index[id]; 1932 tokens.sort() 1933 tok = None 1934 for token in tokens: 1935 if tok == token: 1936 continue 1937 tok = token 1938 output.write(" <ref name='%s'/>\n" % (token)) 1939 count = count + 1 1940 output.write(" </word>\n") 1941 if letter != None: 1942 output.write(" </letter>\n") 1943 output.write(" </chunk>\n") 1944 if count != 0: 1945 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 1946 output.write(" <chunks>\n") 1947 for ch in chunks: 1948 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 1949 ch[0], ch[1], ch[2])) 1950 output.write(" </chunks>\n") 1951 1952 def serialize_xrefs(self, output): 1953 output.write(" <references>\n") 1954 self.serialize_xrefs_references(output) 1955 output.write(" </references>\n") 1956 output.write(" <alpha>\n") 1957 self.serialize_xrefs_alpha(output) 1958 output.write(" </alpha>\n") 1959 output.write(" <constructors>\n") 1960 self.serialize_xrefs_constructors(output) 1961 output.write(" </constructors>\n") 1962 output.write(" <functions>\n") 1963 self.serialize_xrefs_functions(output) 1964 output.write(" </functions>\n") 1965 output.write(" <files>\n") 1966 self.serialize_xrefs_files(output) 1967 output.write(" </files>\n") 1968 output.write(" <index>\n") 1969 self.serialize_xrefs_index(output) 1970 output.write(" </index>\n") 1971 1972 def serialize(self): 1973 filename = "%s-api.xml" % self.name 1974 print "Saving XML description %s" % (filename) 1975 output = open(filename, "w") 1976 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1977 output.write("<api name='%s'>\n" % self.name) 1978 output.write(" <files>\n") 1979 headers = self.headers.keys() 1980 headers.sort() 1981 for file in headers: 1982 self.serialize_exports(output, file) 1983 output.write(" </files>\n") 1984 output.write(" <symbols>\n") 1985 macros = self.idx.macros.keys() 1986 macros.sort() 1987 for macro in macros: 1988 self.serialize_macro(output, macro) 1989 enums = self.idx.enums.keys() 1990 enums.sort() 1991 for enum in enums: 1992 self.serialize_enum(output, enum) 1993 typedefs = self.idx.typedefs.keys() 1994 typedefs.sort() 1995 for typedef in typedefs: 1996 self.serialize_typedef(output, typedef) 1997 variables = self.idx.variables.keys() 1998 variables.sort() 1999 for variable in variables: 2000 self.serialize_variable(output, variable) 2001 functions = self.idx.functions.keys() 2002 functions.sort() 2003 for function in functions: 2004 self.serialize_function(output, function) 2005 output.write(" </symbols>\n") 2006 output.write("</api>\n") 2007 output.close() 2008 2009 filename = "%s-refs.xml" % self.name 2010 print "Saving XML Cross References %s" % (filename) 2011 output = open(filename, "w") 2012 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2013 output.write("<apirefs name='%s'>\n" % self.name) 2014 self.serialize_xrefs(output) 2015 output.write("</apirefs>\n") 2016 output.close() 2017 2018 2019def rebuild(): 2020 builder = None 2021 if glob.glob("parser.c") != [] : 2022 print "Rebuilding API description for libxml2" 2023 builder = docBuilder("libxml2", [".", "."], 2024 ["xmlwin32version.h", "tst.c"]) 2025 elif glob.glob("../parser.c") != [] : 2026 print "Rebuilding API description for libxml2" 2027 builder = docBuilder("libxml2", ["..", "../include/libxml"], 2028 ["xmlwin32version.h", "tst.c"]) 2029 elif glob.glob("../libxslt/transform.c") != [] : 2030 print "Rebuilding API description for libxslt" 2031 builder = docBuilder("libxslt", ["../libxslt"], 2032 ["win32config.h", "libxslt.h", "tst.c"]) 2033 else: 2034 print "rebuild() failed, unable to guess the module" 2035 return None 2036 builder.scan() 2037 builder.analyze() 2038 builder.serialize() 2039 if glob.glob("../libexslt/exslt.c") != [] : 2040 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2041 extra.scan() 2042 extra.analyze() 2043 extra.serialize() 2044 return builder 2045 2046# 2047# for debugging the parser 2048# 2049def parse(filename): 2050 parser = CParser(filename) 2051 idx = parser.parse() 2052 return idx 2053 2054if __name__ == "__main__": 2055 if len(sys.argv) > 1: 2056 debug = 1 2057 parse(sys.argv[1]) 2058 else: 2059 rebuild() 2060