1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14debug=0 15#debugsym='ignorableWhitespaceSAXFunc' 16debugsym=None 17 18# 19# C parser analysis code 20# 21ignored_files = { 22 "trio": "too many non standard macros", 23 "trio.c": "too many non standard macros", 24 "trionan.c": "too many non standard macros", 25 "triostr.c": "too many non standard macros", 26 "acconfig.h": "generated portability layer", 27 "config.h": "generated portability layer", 28 "libxml.h": "internal only", 29 "testOOM.c": "out of memory tester", 30 "testOOMlib.h": "out of memory tester", 31 "testOOMlib.c": "out of memory tester", 32 "rngparser.c": "not yet integrated", 33 "rngparser.h": "not yet integrated", 34 "elfgcchack.h": "not a normal header", 35 "testHTML.c": "test tool", 36 "testReader.c": "test tool", 37 "testSchemas.c": "test tool", 38 "testXPath.c": "test tool", 39 "testAutomata.c": "test tool", 40 "testModule.c": "test tool", 41 "testRegexp.c": "test tool", 42 "testThreads.c": "test tool", 43 "testC14N.c": "test tool", 44 "testRelax.c": "test tool", 45 "testThreadsWin32.c": "test tool", 46 "testSAX.c": "test tool", 47 "testURI.c": "test tool", 48 "testapi.c": "generated regression tests", 49 "runtest.c": "regression tests program", 50 "runsuite.c": "regression tests program", 51 "tst.c": "not part of the library", 52 "test.c": "not part of the library", 53 "testdso.c": "test for dynamid shared libraries", 54 "testrecurse.c": "test for entities recursions", 55 "xzlib.h": "Internal API only 2.8.0", 56 "buf.h": "Internal API only 2.9.0", 57 "enc.h": "Internal API only 2.9.0", 58 "/save.h": "Internal API only 2.9.0", 59 "timsort.h": "Internal header only for xpath.c 2.9.0", 60} 61 62ignored_words = { 63 "WINAPI": (0, "Windows keyword"), 64 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 65 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 66 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"), 67 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"), 68 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 69 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 70 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), 71 "XMLCALL": (0, "Special macro for win32 calls"), 72 "XSLTCALL": (0, "Special macro for win32 calls"), 73 "XMLCDECL": (0, "Special macro for win32 calls"), 74 "EXSLTCALL": (0, "Special macro for win32 calls"), 75 "__declspec": (3, "Windows keyword"), 76 "__stdcall": (0, "Windows keyword"), 77 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 78 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 79 "X_IN_Y": (5, "macro function builder"), 80 "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), 81 "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), 82 "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), 83 "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), 84} 85 86def escape(raw): 87 raw = raw.replace('&', '&') 88 raw = raw.replace('<', '<') 89 raw = raw.replace('>', '>') 90 raw = raw.replace("'", ''') 91 raw = raw.replace('"', '"') 92 return raw 93 94def uniq(items): 95 d = {} 96 for item in items: 97 d[item]=1 98 return list(d.keys()) 99 100class identifier: 101 def __init__(self, name, header=None, module=None, type=None, lineno = 0, 102 info=None, extra=None, conditionals = None): 103 self.name = name 104 self.header = header 105 self.module = module 106 self.type = type 107 self.info = info 108 self.extra = extra 109 self.lineno = lineno 110 self.static = 0 111 if conditionals == None or len(conditionals) == 0: 112 self.conditionals = None 113 else: 114 self.conditionals = conditionals[:] 115 if self.name == debugsym: 116 print("=> define %s : %s" % (debugsym, (module, type, info, 117 extra, conditionals))) 118 119 def __repr__(self): 120 r = "%s %s:" % (self.type, self.name) 121 if self.static: 122 r = r + " static" 123 if self.module != None: 124 r = r + " from %s" % (self.module) 125 if self.info != None: 126 r = r + " " + repr(self.info) 127 if self.extra != None: 128 r = r + " " + repr(self.extra) 129 if self.conditionals != None: 130 r = r + " " + repr(self.conditionals) 131 return r 132 133 134 def set_header(self, header): 135 self.header = header 136 def set_module(self, module): 137 self.module = module 138 def set_type(self, type): 139 self.type = type 140 def set_info(self, info): 141 self.info = info 142 def set_extra(self, extra): 143 self.extra = extra 144 def set_lineno(self, lineno): 145 self.lineno = lineno 146 def set_static(self, static): 147 self.static = static 148 def set_conditionals(self, conditionals): 149 if conditionals == None or len(conditionals) == 0: 150 self.conditionals = None 151 else: 152 self.conditionals = conditionals[:] 153 154 def get_name(self): 155 return self.name 156 def get_header(self): 157 return self.module 158 def get_module(self): 159 return self.module 160 def get_type(self): 161 return self.type 162 def get_info(self): 163 return self.info 164 def get_lineno(self): 165 return self.lineno 166 def get_extra(self): 167 return self.extra 168 def get_static(self): 169 return self.static 170 def get_conditionals(self): 171 return self.conditionals 172 173 def update(self, header, module, type = None, info = None, extra=None, 174 conditionals=None): 175 if self.name == debugsym: 176 print("=> update %s : %s" % (debugsym, (module, type, info, 177 extra, conditionals))) 178 if header != None and self.header == None: 179 self.set_header(module) 180 if module != None and (self.module == None or self.header == self.module): 181 self.set_module(module) 182 if type != None and self.type == None: 183 self.set_type(type) 184 if info != None: 185 self.set_info(info) 186 if extra != None: 187 self.set_extra(extra) 188 if conditionals != None: 189 self.set_conditionals(conditionals) 190 191class index: 192 def __init__(self, name = "noname"): 193 self.name = name 194 self.identifiers = {} 195 self.functions = {} 196 self.variables = {} 197 self.includes = {} 198 self.structs = {} 199 self.enums = {} 200 self.typedefs = {} 201 self.macros = {} 202 self.references = {} 203 self.info = {} 204 205 def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 206 if name[0:2] == '__': 207 return None 208 d = None 209 try: 210 d = self.identifiers[name] 211 d.update(header, module, type, lineno, info, extra, conditionals) 212 except: 213 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 214 self.identifiers[name] = d 215 216 if d != None and static == 1: 217 d.set_static(1) 218 219 if d != None and name != None and type != None: 220 self.references[name] = d 221 222 if name == debugsym: 223 print("New ref: %s" % (d)) 224 225 return d 226 227 def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): 228 if name[0:2] == '__': 229 return None 230 d = None 231 try: 232 d = self.identifiers[name] 233 d.update(header, module, type, lineno, info, extra, conditionals) 234 except: 235 d = identifier(name, header, module, type, lineno, info, extra, conditionals) 236 self.identifiers[name] = d 237 238 if d != None and static == 1: 239 d.set_static(1) 240 241 if d != None and name != None and type != None: 242 if type == "function": 243 self.functions[name] = d 244 elif type == "functype": 245 self.functions[name] = d 246 elif type == "variable": 247 self.variables[name] = d 248 elif type == "include": 249 self.includes[name] = d 250 elif type == "struct": 251 self.structs[name] = d 252 elif type == "enum": 253 self.enums[name] = d 254 elif type == "typedef": 255 self.typedefs[name] = d 256 elif type == "macro": 257 self.macros[name] = d 258 else: 259 print("Unable to register type ", type) 260 261 if name == debugsym: 262 print("New symbol: %s" % (d)) 263 264 return d 265 266 def merge(self, idx): 267 for id in list(idx.functions.keys()): 268 # 269 # macro might be used to override functions or variables 270 # definitions 271 # 272 if id in self.macros: 273 del self.macros[id] 274 if id in self.functions: 275 print("function %s from %s redeclared in %s" % ( 276 id, self.functions[id].header, idx.functions[id].header)) 277 else: 278 self.functions[id] = idx.functions[id] 279 self.identifiers[id] = idx.functions[id] 280 for id in list(idx.variables.keys()): 281 # 282 # macro might be used to override functions or variables 283 # definitions 284 # 285 if id in self.macros: 286 del self.macros[id] 287 if id in self.variables: 288 print("variable %s from %s redeclared in %s" % ( 289 id, self.variables[id].header, idx.variables[id].header)) 290 else: 291 self.variables[id] = idx.variables[id] 292 self.identifiers[id] = idx.variables[id] 293 for id in list(idx.structs.keys()): 294 if id in self.structs: 295 print("struct %s from %s redeclared in %s" % ( 296 id, self.structs[id].header, idx.structs[id].header)) 297 else: 298 self.structs[id] = idx.structs[id] 299 self.identifiers[id] = idx.structs[id] 300 for id in list(idx.typedefs.keys()): 301 if id in self.typedefs: 302 print("typedef %s from %s redeclared in %s" % ( 303 id, self.typedefs[id].header, idx.typedefs[id].header)) 304 else: 305 self.typedefs[id] = idx.typedefs[id] 306 self.identifiers[id] = idx.typedefs[id] 307 for id in list(idx.macros.keys()): 308 # 309 # macro might be used to override functions or variables 310 # definitions 311 # 312 if id in self.variables: 313 continue 314 if id in self.functions: 315 continue 316 if id in self.enums: 317 continue 318 if id in self.macros: 319 print("macro %s from %s redeclared in %s" % ( 320 id, self.macros[id].header, idx.macros[id].header)) 321 else: 322 self.macros[id] = idx.macros[id] 323 self.identifiers[id] = idx.macros[id] 324 for id in list(idx.enums.keys()): 325 if id in self.enums: 326 print("enum %s from %s redeclared in %s" % ( 327 id, self.enums[id].header, idx.enums[id].header)) 328 else: 329 self.enums[id] = idx.enums[id] 330 self.identifiers[id] = idx.enums[id] 331 332 def merge_public(self, idx): 333 for id in list(idx.functions.keys()): 334 if id in self.functions: 335 # check that function condition agrees with header 336 if idx.functions[id].conditionals != \ 337 self.functions[id].conditionals: 338 print("Header condition differs from Function for %s:" \ 339 % id) 340 print(" H: %s" % self.functions[id].conditionals) 341 print(" C: %s" % idx.functions[id].conditionals) 342 up = idx.functions[id] 343 self.functions[id].update(None, up.module, up.type, up.info, up.extra) 344 # else: 345 # print "Function %s from %s is not declared in headers" % ( 346 # id, idx.functions[id].module) 347 # TODO: do the same for variables. 348 349 def analyze_dict(self, type, dict): 350 count = 0 351 public = 0 352 for name in list(dict.keys()): 353 id = dict[name] 354 count = count + 1 355 if id.static == 0: 356 public = public + 1 357 if count != public: 358 print(" %d %s , %d public" % (count, type, public)) 359 elif count != 0: 360 print(" %d public %s" % (count, type)) 361 362 363 def analyze(self): 364 self.analyze_dict("functions", self.functions) 365 self.analyze_dict("variables", self.variables) 366 self.analyze_dict("structs", self.structs) 367 self.analyze_dict("typedefs", self.typedefs) 368 self.analyze_dict("macros", self.macros) 369 370class CLexer: 371 """A lexer for the C language, tokenize the input by reading and 372 analyzing it line by line""" 373 def __init__(self, input): 374 self.input = input 375 self.tokens = [] 376 self.line = "" 377 self.lineno = 0 378 379 def getline(self): 380 line = '' 381 while line == '': 382 line = self.input.readline() 383 if not line: 384 return None 385 self.lineno = self.lineno + 1 386 line = line.lstrip() 387 line = line.rstrip() 388 if line == '': 389 continue 390 while line[-1] == '\\': 391 line = line[:-1] 392 n = self.input.readline() 393 self.lineno = self.lineno + 1 394 n = n.lstrip() 395 n = n.rstrip() 396 if not n: 397 break 398 else: 399 line = line + n 400 return line 401 402 def getlineno(self): 403 return self.lineno 404 405 def push(self, token): 406 self.tokens.insert(0, token); 407 408 def debug(self): 409 print("Last token: ", self.last) 410 print("Token queue: ", self.tokens) 411 print("Line %d end: " % (self.lineno), self.line) 412 413 def token(self): 414 while self.tokens == []: 415 if self.line == "": 416 line = self.getline() 417 else: 418 line = self.line 419 self.line = "" 420 if line == None: 421 return None 422 423 if line[0] == '#': 424 self.tokens = list(map((lambda x: ('preproc', x)), 425 line.split())) 426 break; 427 l = len(line) 428 if line[0] == '"' or line[0] == "'": 429 end = line[0] 430 line = line[1:] 431 found = 0 432 tok = "" 433 while found == 0: 434 i = 0 435 l = len(line) 436 while i < l: 437 if line[i] == end: 438 self.line = line[i+1:] 439 line = line[:i] 440 l = i 441 found = 1 442 break 443 if line[i] == '\\': 444 i = i + 1 445 i = i + 1 446 tok = tok + line 447 if found == 0: 448 line = self.getline() 449 if line == None: 450 return None 451 self.last = ('string', tok) 452 return self.last 453 454 if l >= 2 and line[0] == '/' and line[1] == '*': 455 line = line[2:] 456 found = 0 457 tok = "" 458 while found == 0: 459 i = 0 460 l = len(line) 461 while i < l: 462 if line[i] == '*' and i+1 < l and line[i+1] == '/': 463 self.line = line[i+2:] 464 line = line[:i-1] 465 l = i 466 found = 1 467 break 468 i = i + 1 469 if tok != "": 470 tok = tok + "\n" 471 tok = tok + line 472 if found == 0: 473 line = self.getline() 474 if line == None: 475 return None 476 self.last = ('comment', tok) 477 return self.last 478 if l >= 2 and line[0] == '/' and line[1] == '/': 479 line = line[2:] 480 self.last = ('comment', line) 481 return self.last 482 i = 0 483 while i < l: 484 if line[i] == '/' and i+1 < l and line[i+1] == '/': 485 self.line = line[i:] 486 line = line[:i] 487 break 488 if line[i] == '/' and i+1 < l and line[i+1] == '*': 489 self.line = line[i:] 490 line = line[:i] 491 break 492 if line[i] == '"' or line[i] == "'": 493 self.line = line[i:] 494 line = line[:i] 495 break 496 i = i + 1 497 l = len(line) 498 i = 0 499 while i < l: 500 if line[i] == ' ' or line[i] == '\t': 501 i = i + 1 502 continue 503 o = ord(line[i]) 504 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 505 (o >= 48 and o <= 57): 506 s = i 507 while i < l: 508 o = ord(line[i]) 509 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 510 (o >= 48 and o <= 57) or \ 511 (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: 512 i = i + 1 513 else: 514 break 515 self.tokens.append(('name', line[s:i])) 516 continue 517 if "(){}:;,[]".find(line[i]) != -1: 518# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 519# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 520# line[i] == ',' or line[i] == '[' or line[i] == ']': 521 self.tokens.append(('sep', line[i])) 522 i = i + 1 523 continue 524 if "+-*><=/%&!|.".find(line[i]) != -1: 525# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 526# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 527# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 528# line[i] == '!' or line[i] == '|' or line[i] == '.': 529 if line[i] == '.' and i + 2 < l and \ 530 line[i+1] == '.' and line[i+2] == '.': 531 self.tokens.append(('name', '...')) 532 i = i + 3 533 continue 534 535 j = i + 1 536 if j < l and ( 537 "+-*><=/%&!|".find(line[j]) != -1): 538# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 539# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 540# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 541# line[j] == '!' or line[j] == '|'): 542 self.tokens.append(('op', line[i:j+1])) 543 i = j + 1 544 else: 545 self.tokens.append(('op', line[i])) 546 i = i + 1 547 continue 548 s = i 549 while i < l: 550 o = ord(line[i]) 551 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 552 (o >= 48 and o <= 57) or ( 553 " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): 554# line[i] != ' ' and line[i] != '\t' and 555# line[i] != '(' and line[i] != ')' and 556# line[i] != '{' and line[i] != '}' and 557# line[i] != ':' and line[i] != ';' and 558# line[i] != ',' and line[i] != '+' and 559# line[i] != '-' and line[i] != '*' and 560# line[i] != '/' and line[i] != '%' and 561# line[i] != '&' and line[i] != '!' and 562# line[i] != '|' and line[i] != '[' and 563# line[i] != ']' and line[i] != '=' and 564# line[i] != '*' and line[i] != '>' and 565# line[i] != '<'): 566 i = i + 1 567 else: 568 break 569 self.tokens.append(('name', line[s:i])) 570 571 tok = self.tokens[0] 572 self.tokens = self.tokens[1:] 573 self.last = tok 574 return tok 575 576class CParser: 577 """The C module parser""" 578 def __init__(self, filename, idx = None): 579 self.filename = filename 580 if len(filename) > 2 and filename[-2:] == '.h': 581 self.is_header = 1 582 else: 583 self.is_header = 0 584 self.input = open(filename) 585 self.lexer = CLexer(self.input) 586 if idx == None: 587 self.index = index() 588 else: 589 self.index = idx 590 self.top_comment = "" 591 self.last_comment = "" 592 self.comment = None 593 self.collect_ref = 0 594 self.no_error = 0 595 self.conditionals = [] 596 self.defines = [] 597 598 def collect_references(self): 599 self.collect_ref = 1 600 601 def stop_error(self): 602 self.no_error = 1 603 604 def start_error(self): 605 self.no_error = 0 606 607 def lineno(self): 608 return self.lexer.getlineno() 609 610 def index_add(self, name, module, static, type, info=None, extra = None): 611 if self.is_header == 1: 612 self.index.add(name, module, module, static, type, self.lineno(), 613 info, extra, self.conditionals) 614 else: 615 self.index.add(name, None, module, static, type, self.lineno(), 616 info, extra, self.conditionals) 617 618 def index_add_ref(self, name, module, static, type, info=None, 619 extra = None): 620 if self.is_header == 1: 621 self.index.add_ref(name, module, module, static, type, 622 self.lineno(), info, extra, self.conditionals) 623 else: 624 self.index.add_ref(name, None, module, static, type, self.lineno(), 625 info, extra, self.conditionals) 626 627 def warning(self, msg): 628 if self.no_error: 629 return 630 print(msg) 631 632 def error(self, msg, token=-1): 633 if self.no_error: 634 return 635 636 print("Parse Error: " + msg) 637 if token != -1: 638 print("Got token ", token) 639 self.lexer.debug() 640 sys.exit(1) 641 642 def debug(self, msg, token=-1): 643 print("Debug: " + msg) 644 if token != -1: 645 print("Got token ", token) 646 self.lexer.debug() 647 648 def parseTopComment(self, comment): 649 res = {} 650 lines = comment.split("\n") 651 item = None 652 for line in lines: 653 while line != "" and (line[0] == ' ' or line[0] == '\t'): 654 line = line[1:] 655 while line != "" and line[0] == '*': 656 line = line[1:] 657 while line != "" and (line[0] == ' ' or line[0] == '\t'): 658 line = line[1:] 659 try: 660 (it, line) = line.split(":", 1) 661 item = it 662 while line != "" and (line[0] == ' ' or line[0] == '\t'): 663 line = line[1:] 664 if item in res: 665 res[item] = res[item] + " " + line 666 else: 667 res[item] = line 668 except: 669 if item != None: 670 if item in res: 671 res[item] = res[item] + " " + line 672 else: 673 res[item] = line 674 self.index.info = res 675 676 def parseComment(self, token): 677 if self.top_comment == "": 678 self.top_comment = token[1] 679 if self.comment == None or token[1][0] == '*': 680 self.comment = token[1]; 681 else: 682 self.comment = self.comment + token[1] 683 token = self.lexer.token() 684 685 if self.comment.find("DOC_DISABLE") != -1: 686 self.stop_error() 687 688 if self.comment.find("DOC_ENABLE") != -1: 689 self.start_error() 690 691 return token 692 693 # 694 # Parse a comment block associate to a typedef 695 # 696 def parseTypeComment(self, name, quiet = 0): 697 if name[0:2] == '__': 698 quiet = 1 699 700 args = [] 701 desc = "" 702 703 if self.comment == None: 704 if not quiet: 705 self.warning("Missing comment for type %s" % (name)) 706 return((args, desc)) 707 if self.comment[0] != '*': 708 if not quiet: 709 self.warning("Missing * in type comment for %s" % (name)) 710 return((args, desc)) 711 lines = self.comment.split('\n') 712 if lines[0] == '*': 713 del lines[0] 714 if lines[0] != "* %s:" % (name): 715 if not quiet: 716 self.warning("Misformatted type comment for %s" % (name)) 717 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 718 return((args, desc)) 719 del lines[0] 720 while len(lines) > 0 and lines[0] == '*': 721 del lines[0] 722 desc = "" 723 while len(lines) > 0: 724 l = lines[0] 725 while len(l) > 0 and l[0] == '*': 726 l = l[1:] 727 l = l.strip() 728 desc = desc + " " + l 729 del lines[0] 730 731 desc = desc.strip() 732 733 if quiet == 0: 734 if desc == "": 735 self.warning("Type comment for %s lack description of the macro" % (name)) 736 737 return(desc) 738 # 739 # Parse a comment block associate to a macro 740 # 741 def parseMacroComment(self, name, quiet = 0): 742 if name[0:2] == '__': 743 quiet = 1 744 745 args = [] 746 desc = "" 747 748 if self.comment == None: 749 if not quiet: 750 self.warning("Missing comment for macro %s" % (name)) 751 return((args, desc)) 752 if self.comment[0] != '*': 753 if not quiet: 754 self.warning("Missing * in macro comment for %s" % (name)) 755 return((args, desc)) 756 lines = self.comment.split('\n') 757 if lines[0] == '*': 758 del lines[0] 759 if lines[0] != "* %s:" % (name): 760 if not quiet: 761 self.warning("Misformatted macro comment for %s" % (name)) 762 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 763 return((args, desc)) 764 del lines[0] 765 while lines[0] == '*': 766 del lines[0] 767 while len(lines) > 0 and lines[0][0:3] == '* @': 768 l = lines[0][3:] 769 try: 770 (arg, desc) = l.split(':', 1) 771 desc=desc.strip() 772 arg=arg.strip() 773 except: 774 if not quiet: 775 self.warning("Misformatted macro comment for %s" % (name)) 776 self.warning(" problem with '%s'" % (lines[0])) 777 del lines[0] 778 continue 779 del lines[0] 780 l = lines[0].strip() 781 while len(l) > 2 and l[0:3] != '* @': 782 while l[0] == '*': 783 l = l[1:] 784 desc = desc + ' ' + l.strip() 785 del lines[0] 786 if len(lines) == 0: 787 break 788 l = lines[0] 789 args.append((arg, desc)) 790 while len(lines) > 0 and lines[0] == '*': 791 del lines[0] 792 desc = "" 793 while len(lines) > 0: 794 l = lines[0] 795 while len(l) > 0 and l[0] == '*': 796 l = l[1:] 797 l = l.strip() 798 desc = desc + " " + l 799 del lines[0] 800 801 desc = desc.strip() 802 803 if quiet == 0: 804 if desc == "": 805 self.warning("Macro comment for %s lack description of the macro" % (name)) 806 807 return((args, desc)) 808 809 # 810 # Parse a comment block and merge the informations found in the 811 # parameters descriptions, finally returns a block as complete 812 # as possible 813 # 814 def mergeFunctionComment(self, name, description, quiet = 0): 815 if name == 'main': 816 quiet = 1 817 if name[0:2] == '__': 818 quiet = 1 819 820 (ret, args) = description 821 desc = "" 822 retdesc = "" 823 824 if self.comment == None: 825 if not quiet: 826 self.warning("Missing comment for function %s" % (name)) 827 return(((ret[0], retdesc), args, desc)) 828 if self.comment[0] != '*': 829 if not quiet: 830 self.warning("Missing * in function comment for %s" % (name)) 831 return(((ret[0], retdesc), args, desc)) 832 lines = self.comment.split('\n') 833 if lines[0] == '*': 834 del lines[0] 835 if lines[0] != "* %s:" % (name): 836 if not quiet: 837 self.warning("Misformatted function comment for %s" % (name)) 838 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) 839 return(((ret[0], retdesc), args, desc)) 840 del lines[0] 841 while lines[0] == '*': 842 del lines[0] 843 nbargs = len(args) 844 while len(lines) > 0 and lines[0][0:3] == '* @': 845 l = lines[0][3:] 846 try: 847 (arg, desc) = l.split(':', 1) 848 desc=desc.strip() 849 arg=arg.strip() 850 except: 851 if not quiet: 852 self.warning("Misformatted function comment for %s" % (name)) 853 self.warning(" problem with '%s'" % (lines[0])) 854 del lines[0] 855 continue 856 del lines[0] 857 l = lines[0].strip() 858 while len(l) > 2 and l[0:3] != '* @': 859 while l[0] == '*': 860 l = l[1:] 861 desc = desc + ' ' + l.strip() 862 del lines[0] 863 if len(lines) == 0: 864 break 865 l = lines[0] 866 i = 0 867 while i < nbargs: 868 if args[i][1] == arg: 869 args[i] = (args[i][0], arg, desc) 870 break; 871 i = i + 1 872 if i >= nbargs: 873 if not quiet: 874 self.warning("Unable to find arg %s from function comment for %s" % ( 875 arg, name)) 876 while len(lines) > 0 and lines[0] == '*': 877 del lines[0] 878 desc = "" 879 while len(lines) > 0: 880 l = lines[0] 881 while len(l) > 0 and l[0] == '*': 882 l = l[1:] 883 l = l.strip() 884 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 885 try: 886 l = l.split(' ', 1)[1] 887 except: 888 l = "" 889 retdesc = l.strip() 890 del lines[0] 891 while len(lines) > 0: 892 l = lines[0] 893 while len(l) > 0 and l[0] == '*': 894 l = l[1:] 895 l = l.strip() 896 retdesc = retdesc + " " + l 897 del lines[0] 898 else: 899 desc = desc + " " + l 900 del lines[0] 901 902 retdesc = retdesc.strip() 903 desc = desc.strip() 904 905 if quiet == 0: 906 # 907 # report missing comments 908 # 909 i = 0 910 while i < nbargs: 911 if args[i][2] == None and args[i][0] != "void" and \ 912 ((args[i][1] != None) or (args[i][1] == '')): 913 self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) 914 i = i + 1 915 if retdesc == "" and ret[0] != "void": 916 self.warning("Function comment for %s lacks description of return value" % (name)) 917 if desc == "": 918 self.warning("Function comment for %s lacks description of the function" % (name)) 919 920 return(((ret[0], retdesc), args, desc)) 921 922 def parsePreproc(self, token): 923 if debug: 924 print("=> preproc ", token, self.lexer.tokens) 925 name = token[1] 926 if name == "#include": 927 token = self.lexer.token() 928 if token == None: 929 return None 930 if token[0] == 'preproc': 931 self.index_add(token[1], self.filename, not self.is_header, 932 "include") 933 return self.lexer.token() 934 return token 935 if name == "#define": 936 token = self.lexer.token() 937 if token == None: 938 return None 939 if token[0] == 'preproc': 940 # TODO macros with arguments 941 name = token[1] 942 lst = [] 943 token = self.lexer.token() 944 while token != None and token[0] == 'preproc' and \ 945 token[1][0] != '#': 946 lst.append(token[1]) 947 token = self.lexer.token() 948 try: 949 name = name.split('(') [0] 950 except: 951 pass 952 info = self.parseMacroComment(name, not self.is_header) 953 self.index_add(name, self.filename, not self.is_header, 954 "macro", info) 955 return token 956 957 # 958 # Processing of conditionals modified by Bill 1/1/05 959 # 960 # We process conditionals (i.e. tokens from #ifdef, #ifndef, 961 # #if, #else and #endif) for headers and mainline code, 962 # store the ones from the header in libxml2-api.xml, and later 963 # (in the routine merge_public) verify that the two (header and 964 # mainline code) agree. 965 # 966 # There is a small problem with processing the headers. Some of 967 # the variables are not concerned with enabling / disabling of 968 # library functions (e.g. '__XML_PARSER_H__'), and we don't want 969 # them to be included in libxml2-api.xml, or involved in 970 # the check between the header and the mainline code. To 971 # accomplish this, we ignore any conditional which doesn't include 972 # the string 'ENABLED' 973 # 974 if name == "#ifdef": 975 apstr = self.lexer.tokens[0][1] 976 try: 977 self.defines.append(apstr) 978 if apstr.find('ENABLED') != -1: 979 self.conditionals.append("defined(%s)" % apstr) 980 except: 981 pass 982 elif name == "#ifndef": 983 apstr = self.lexer.tokens[0][1] 984 try: 985 self.defines.append(apstr) 986 if apstr.find('ENABLED') != -1: 987 self.conditionals.append("!defined(%s)" % apstr) 988 except: 989 pass 990 elif name == "#if": 991 apstr = "" 992 for tok in self.lexer.tokens: 993 if apstr != "": 994 apstr = apstr + " " 995 apstr = apstr + tok[1] 996 try: 997 self.defines.append(apstr) 998 if apstr.find('ENABLED') != -1: 999 self.conditionals.append(apstr) 1000 except: 1001 pass 1002 elif name == "#else": 1003 if self.conditionals != [] and \ 1004 self.defines[-1].find('ENABLED') != -1: 1005 self.conditionals[-1] = "!(%s)" % self.conditionals[-1] 1006 elif name == "#endif": 1007 if self.conditionals != [] and \ 1008 self.defines[-1].find('ENABLED') != -1: 1009 self.conditionals = self.conditionals[:-1] 1010 self.defines = self.defines[:-1] 1011 token = self.lexer.token() 1012 while token != None and token[0] == 'preproc' and \ 1013 token[1][0] != '#': 1014 token = self.lexer.token() 1015 return token 1016 1017 # 1018 # token acquisition on top of the lexer, it handle internally 1019 # preprocessor and comments since they are logically not part of 1020 # the program structure. 1021 # 1022 def token(self): 1023 global ignored_words 1024 1025 token = self.lexer.token() 1026 while token != None: 1027 if token[0] == 'comment': 1028 token = self.parseComment(token) 1029 continue 1030 elif token[0] == 'preproc': 1031 token = self.parsePreproc(token) 1032 continue 1033 elif token[0] == "name" and token[1] == "__const": 1034 token = ("name", "const") 1035 return token 1036 elif token[0] == "name" and token[1] == "__attribute": 1037 token = self.lexer.token() 1038 while token != None and token[1] != ";": 1039 token = self.lexer.token() 1040 return token 1041 elif token[0] == "name" and token[1] in ignored_words: 1042 (n, info) = ignored_words[token[1]] 1043 i = 0 1044 while i < n: 1045 token = self.lexer.token() 1046 i = i + 1 1047 token = self.lexer.token() 1048 continue 1049 else: 1050 if debug: 1051 print("=> ", token) 1052 return token 1053 return None 1054 1055 # 1056 # Parse a typedef, it records the type and its name. 1057 # 1058 def parseTypedef(self, token): 1059 if token == None: 1060 return None 1061 token = self.parseType(token) 1062 if token == None: 1063 self.error("parsing typedef") 1064 return None 1065 base_type = self.type 1066 type = base_type 1067 #self.debug("end typedef type", token) 1068 while token != None: 1069 if token[0] == "name": 1070 name = token[1] 1071 signature = self.signature 1072 if signature != None: 1073 type = type.split('(')[0] 1074 d = self.mergeFunctionComment(name, 1075 ((type, None), signature), 1) 1076 self.index_add(name, self.filename, not self.is_header, 1077 "functype", d) 1078 else: 1079 if base_type == "struct": 1080 self.index_add(name, self.filename, not self.is_header, 1081 "struct", type) 1082 base_type = "struct " + name 1083 else: 1084 # TODO report missing or misformatted comments 1085 info = self.parseTypeComment(name, 1) 1086 self.index_add(name, self.filename, not self.is_header, 1087 "typedef", type, info) 1088 token = self.token() 1089 else: 1090 self.error("parsing typedef: expecting a name") 1091 return token 1092 #self.debug("end typedef", token) 1093 if token != None and token[0] == 'sep' and token[1] == ',': 1094 type = base_type 1095 token = self.token() 1096 while token != None and token[0] == "op": 1097 type = type + token[1] 1098 token = self.token() 1099 elif token != None and token[0] == 'sep' and token[1] == ';': 1100 break; 1101 elif token != None and token[0] == 'name': 1102 type = base_type 1103 continue; 1104 else: 1105 self.error("parsing typedef: expecting ';'", token) 1106 return token 1107 token = self.token() 1108 return token 1109 1110 # 1111 # Parse a C code block, used for functions it parse till 1112 # the balancing } included 1113 # 1114 def parseBlock(self, token): 1115 while token != None: 1116 if token[0] == "sep" and token[1] == "{": 1117 token = self.token() 1118 token = self.parseBlock(token) 1119 elif token[0] == "sep" and token[1] == "}": 1120 self.comment = None 1121 token = self.token() 1122 return token 1123 else: 1124 if self.collect_ref == 1: 1125 oldtok = token 1126 token = self.token() 1127 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 1128 if token[0] == "sep" and token[1] == "(": 1129 self.index_add_ref(oldtok[1], self.filename, 1130 0, "function") 1131 token = self.token() 1132 elif token[0] == "name": 1133 token = self.token() 1134 if token[0] == "sep" and (token[1] == ";" or 1135 token[1] == "," or token[1] == "="): 1136 self.index_add_ref(oldtok[1], self.filename, 1137 0, "type") 1138 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 1139 self.index_add_ref(oldtok[1], self.filename, 1140 0, "typedef") 1141 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 1142 self.index_add_ref(oldtok[1], self.filename, 1143 0, "typedef") 1144 1145 else: 1146 token = self.token() 1147 return token 1148 1149 # 1150 # Parse a C struct definition till the balancing } 1151 # 1152 def parseStruct(self, token): 1153 fields = [] 1154 #self.debug("start parseStruct", token) 1155 while token != None: 1156 if token[0] == "sep" and token[1] == "{": 1157 token = self.token() 1158 token = self.parseTypeBlock(token) 1159 elif token[0] == "sep" and token[1] == "}": 1160 self.struct_fields = fields 1161 #self.debug("end parseStruct", token) 1162 #print fields 1163 token = self.token() 1164 return token 1165 else: 1166 base_type = self.type 1167 #self.debug("before parseType", token) 1168 token = self.parseType(token) 1169 #self.debug("after parseType", token) 1170 if token != None and token[0] == "name": 1171 fname = token[1] 1172 token = self.token() 1173 if token[0] == "sep" and token[1] == ";": 1174 self.comment = None 1175 token = self.token() 1176 fields.append((self.type, fname, self.comment)) 1177 self.comment = None 1178 else: 1179 self.error("parseStruct: expecting ;", token) 1180 elif token != None and token[0] == "sep" and token[1] == "{": 1181 token = self.token() 1182 token = self.parseTypeBlock(token) 1183 if token != None and token[0] == "name": 1184 token = self.token() 1185 if token != None and token[0] == "sep" and token[1] == ";": 1186 token = self.token() 1187 else: 1188 self.error("parseStruct: expecting ;", token) 1189 else: 1190 self.error("parseStruct: name", token) 1191 token = self.token() 1192 self.type = base_type; 1193 self.struct_fields = fields 1194 #self.debug("end parseStruct", token) 1195 #print fields 1196 return token 1197 1198 # 1199 # Parse a C enum block, parse till the balancing } 1200 # 1201 def parseEnumBlock(self, token): 1202 self.enums = [] 1203 name = None 1204 self.comment = None 1205 comment = "" 1206 value = "0" 1207 while token != None: 1208 if token[0] == "sep" and token[1] == "{": 1209 token = self.token() 1210 token = self.parseTypeBlock(token) 1211 elif token[0] == "sep" and token[1] == "}": 1212 if name != None: 1213 if self.comment != None: 1214 comment = self.comment 1215 self.comment = None 1216 self.enums.append((name, value, comment)) 1217 token = self.token() 1218 return token 1219 elif token[0] == "name": 1220 if name != None: 1221 if self.comment != None: 1222 comment = self.comment.strip() 1223 self.comment = None 1224 self.enums.append((name, value, comment)) 1225 name = token[1] 1226 comment = "" 1227 token = self.token() 1228 if token[0] == "op" and token[1][0] == "=": 1229 value = "" 1230 if len(token[1]) > 1: 1231 value = token[1][1:] 1232 token = self.token() 1233 while token[0] != "sep" or (token[1] != ',' and 1234 token[1] != '}'): 1235 value = value + token[1] 1236 token = self.token() 1237 else: 1238 try: 1239 value = "%d" % (int(value) + 1) 1240 except: 1241 self.warning("Failed to compute value of enum %s" % (name)) 1242 value="" 1243 if token[0] == "sep" and token[1] == ",": 1244 token = self.token() 1245 else: 1246 token = self.token() 1247 return token 1248 1249 # 1250 # Parse a C definition block, used for structs it parse till 1251 # the balancing } 1252 # 1253 def parseTypeBlock(self, token): 1254 while token != None: 1255 if token[0] == "sep" and token[1] == "{": 1256 token = self.token() 1257 token = self.parseTypeBlock(token) 1258 elif token[0] == "sep" and token[1] == "}": 1259 token = self.token() 1260 return token 1261 else: 1262 token = self.token() 1263 return token 1264 1265 # 1266 # Parse a type: the fact that the type name can either occur after 1267 # the definition or within the definition makes it a little harder 1268 # if inside, the name token is pushed back before returning 1269 # 1270 def parseType(self, token): 1271 self.type = "" 1272 self.struct_fields = [] 1273 self.signature = None 1274 if token == None: 1275 return token 1276 1277 while token[0] == "name" and ( 1278 token[1] == "const" or \ 1279 token[1] == "unsigned" or \ 1280 token[1] == "signed"): 1281 if self.type == "": 1282 self.type = token[1] 1283 else: 1284 self.type = self.type + " " + token[1] 1285 token = self.token() 1286 1287 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1288 if self.type == "": 1289 self.type = token[1] 1290 else: 1291 self.type = self.type + " " + token[1] 1292 if token[0] == "name" and token[1] == "int": 1293 if self.type == "": 1294 self.type = tmp[1] 1295 else: 1296 self.type = self.type + " " + tmp[1] 1297 1298 elif token[0] == "name" and token[1] == "struct": 1299 if self.type == "": 1300 self.type = token[1] 1301 else: 1302 self.type = self.type + " " + token[1] 1303 token = self.token() 1304 nametok = None 1305 if token[0] == "name": 1306 nametok = token 1307 token = self.token() 1308 if token != None and token[0] == "sep" and token[1] == "{": 1309 token = self.token() 1310 token = self.parseStruct(token) 1311 elif token != None and token[0] == "op" and token[1] == "*": 1312 self.type = self.type + " " + nametok[1] + " *" 1313 token = self.token() 1314 while token != None and token[0] == "op" and token[1] == "*": 1315 self.type = self.type + " *" 1316 token = self.token() 1317 if token[0] == "name": 1318 nametok = token 1319 token = self.token() 1320 else: 1321 self.error("struct : expecting name", token) 1322 return token 1323 elif token != None and token[0] == "name" and nametok != None: 1324 self.type = self.type + " " + nametok[1] 1325 return token 1326 1327 if nametok != None: 1328 self.lexer.push(token) 1329 token = nametok 1330 return token 1331 1332 elif token[0] == "name" and token[1] == "enum": 1333 if self.type == "": 1334 self.type = token[1] 1335 else: 1336 self.type = self.type + " " + token[1] 1337 self.enums = [] 1338 token = self.token() 1339 if token != None and token[0] == "sep" and token[1] == "{": 1340 token = self.token() 1341 token = self.parseEnumBlock(token) 1342 else: 1343 self.error("parsing enum: expecting '{'", token) 1344 enum_type = None 1345 if token != None and token[0] != "name": 1346 self.lexer.push(token) 1347 token = ("name", "enum") 1348 else: 1349 enum_type = token[1] 1350 for enum in self.enums: 1351 self.index_add(enum[0], self.filename, 1352 not self.is_header, "enum", 1353 (enum[1], enum[2], enum_type)) 1354 return token 1355 1356 elif token[0] == "name": 1357 if self.type == "": 1358 self.type = token[1] 1359 else: 1360 self.type = self.type + " " + token[1] 1361 else: 1362 self.error("parsing type %s: expecting a name" % (self.type), 1363 token) 1364 return token 1365 token = self.token() 1366 while token != None and (token[0] == "op" or 1367 token[0] == "name" and token[1] == "const"): 1368 self.type = self.type + " " + token[1] 1369 token = self.token() 1370 1371 # 1372 # if there is a parenthesis here, this means a function type 1373 # 1374 if token != None and token[0] == "sep" and token[1] == '(': 1375 self.type = self.type + token[1] 1376 token = self.token() 1377 while token != None and token[0] == "op" and token[1] == '*': 1378 self.type = self.type + token[1] 1379 token = self.token() 1380 if token == None or token[0] != "name" : 1381 self.error("parsing function type, name expected", token); 1382 return token 1383 self.type = self.type + token[1] 1384 nametok = token 1385 token = self.token() 1386 if token != None and token[0] == "sep" and token[1] == ')': 1387 self.type = self.type + token[1] 1388 token = self.token() 1389 if token != None and token[0] == "sep" and token[1] == '(': 1390 token = self.token() 1391 type = self.type; 1392 token = self.parseSignature(token); 1393 self.type = type; 1394 else: 1395 self.error("parsing function type, '(' expected", token); 1396 return token 1397 else: 1398 self.error("parsing function type, ')' expected", token); 1399 return token 1400 self.lexer.push(token) 1401 token = nametok 1402 return token 1403 1404 # 1405 # do some lookahead for arrays 1406 # 1407 if token != None and token[0] == "name": 1408 nametok = token 1409 token = self.token() 1410 if token != None and token[0] == "sep" and token[1] == '[': 1411 self.type = self.type + nametok[1] 1412 while token != None and token[0] == "sep" and token[1] == '[': 1413 self.type = self.type + token[1] 1414 token = self.token() 1415 while token != None and token[0] != 'sep' and \ 1416 token[1] != ']' and token[1] != ';': 1417 self.type = self.type + token[1] 1418 token = self.token() 1419 if token != None and token[0] == 'sep' and token[1] == ']': 1420 self.type = self.type + token[1] 1421 token = self.token() 1422 else: 1423 self.error("parsing array type, ']' expected", token); 1424 return token 1425 elif token != None and token[0] == "sep" and token[1] == ':': 1426 # remove :12 in case it's a limited int size 1427 token = self.token() 1428 token = self.token() 1429 self.lexer.push(token) 1430 token = nametok 1431 1432 return token 1433 1434 # 1435 # Parse a signature: '(' has been parsed and we scan the type definition 1436 # up to the ')' included 1437 def parseSignature(self, token): 1438 signature = [] 1439 if token != None and token[0] == "sep" and token[1] == ')': 1440 self.signature = [] 1441 token = self.token() 1442 return token 1443 while token != None: 1444 token = self.parseType(token) 1445 if token != None and token[0] == "name": 1446 signature.append((self.type, token[1], None)) 1447 token = self.token() 1448 elif token != None and token[0] == "sep" and token[1] == ',': 1449 token = self.token() 1450 continue 1451 elif token != None and token[0] == "sep" and token[1] == ')': 1452 # only the type was provided 1453 if self.type == "...": 1454 signature.append((self.type, "...", None)) 1455 else: 1456 signature.append((self.type, None, None)) 1457 if token != None and token[0] == "sep": 1458 if token[1] == ',': 1459 token = self.token() 1460 continue 1461 elif token[1] == ')': 1462 token = self.token() 1463 break 1464 self.signature = signature 1465 return token 1466 1467 # 1468 # Parse a global definition, be it a type, variable or function 1469 # the extern "C" blocks are a bit nasty and require it to recurse. 1470 # 1471 def parseGlobal(self, token): 1472 static = 0 1473 if token[1] == 'extern': 1474 token = self.token() 1475 if token == None: 1476 return token 1477 if token[0] == 'string': 1478 if token[1] == 'C': 1479 token = self.token() 1480 if token == None: 1481 return token 1482 if token[0] == 'sep' and token[1] == "{": 1483 token = self.token() 1484# print 'Entering extern "C line ', self.lineno() 1485 while token != None and (token[0] != 'sep' or 1486 token[1] != "}"): 1487 if token[0] == 'name': 1488 token = self.parseGlobal(token) 1489 else: 1490 self.error( 1491 "token %s %s unexpected at the top level" % ( 1492 token[0], token[1])) 1493 token = self.parseGlobal(token) 1494# print 'Exiting extern "C" line', self.lineno() 1495 token = self.token() 1496 return token 1497 else: 1498 return token 1499 elif token[1] == 'static': 1500 static = 1 1501 token = self.token() 1502 if token == None or token[0] != 'name': 1503 return token 1504 1505 if token[1] == 'typedef': 1506 token = self.token() 1507 return self.parseTypedef(token) 1508 else: 1509 token = self.parseType(token) 1510 type_orig = self.type 1511 if token == None or token[0] != "name": 1512 return token 1513 type = type_orig 1514 self.name = token[1] 1515 token = self.token() 1516 while token != None and (token[0] == "sep" or token[0] == "op"): 1517 if token[0] == "sep": 1518 if token[1] == "[": 1519 type = type + token[1] 1520 token = self.token() 1521 while token != None and (token[0] != "sep" or \ 1522 token[1] != ";"): 1523 type = type + token[1] 1524 token = self.token() 1525 1526 if token != None and token[0] == "op" and token[1] == "=": 1527 # 1528 # Skip the initialization of the variable 1529 # 1530 token = self.token() 1531 if token[0] == 'sep' and token[1] == '{': 1532 token = self.token() 1533 token = self.parseBlock(token) 1534 else: 1535 self.comment = None 1536 while token != None and (token[0] != "sep" or \ 1537 (token[1] != ';' and token[1] != ',')): 1538 token = self.token() 1539 self.comment = None 1540 if token == None or token[0] != "sep" or (token[1] != ';' and 1541 token[1] != ','): 1542 self.error("missing ';' or ',' after value") 1543 1544 if token != None and token[0] == "sep": 1545 if token[1] == ";": 1546 self.comment = None 1547 token = self.token() 1548 if type == "struct": 1549 self.index_add(self.name, self.filename, 1550 not self.is_header, "struct", self.struct_fields) 1551 else: 1552 self.index_add(self.name, self.filename, 1553 not self.is_header, "variable", type) 1554 break 1555 elif token[1] == "(": 1556 token = self.token() 1557 token = self.parseSignature(token) 1558 if token == None: 1559 return None 1560 if token[0] == "sep" and token[1] == ";": 1561 d = self.mergeFunctionComment(self.name, 1562 ((type, None), self.signature), 1) 1563 self.index_add(self.name, self.filename, static, 1564 "function", d) 1565 token = self.token() 1566 elif token[0] == "sep" and token[1] == "{": 1567 d = self.mergeFunctionComment(self.name, 1568 ((type, None), self.signature), static) 1569 self.index_add(self.name, self.filename, static, 1570 "function", d) 1571 token = self.token() 1572 token = self.parseBlock(token); 1573 elif token[1] == ',': 1574 self.comment = None 1575 self.index_add(self.name, self.filename, static, 1576 "variable", type) 1577 type = type_orig 1578 token = self.token() 1579 while token != None and token[0] == "sep": 1580 type = type + token[1] 1581 token = self.token() 1582 if token != None and token[0] == "name": 1583 self.name = token[1] 1584 token = self.token() 1585 else: 1586 break 1587 1588 return token 1589 1590 def parse(self): 1591 self.warning("Parsing %s" % (self.filename)) 1592 token = self.token() 1593 while token != None: 1594 if token[0] == 'name': 1595 token = self.parseGlobal(token) 1596 else: 1597 self.error("token %s %s unexpected at the top level" % ( 1598 token[0], token[1])) 1599 token = self.parseGlobal(token) 1600 return 1601 self.parseTopComment(self.top_comment) 1602 return self.index 1603 1604 1605class docBuilder: 1606 """A documentation builder""" 1607 def __init__(self, name, directories=['.'], excludes=[]): 1608 self.name = name 1609 self.directories = directories 1610 self.excludes = excludes + list(ignored_files.keys()) 1611 self.modules = {} 1612 self.headers = {} 1613 self.idx = index() 1614 self.xref = {} 1615 self.index = {} 1616 if name == 'libxml2': 1617 self.basename = 'libxml' 1618 else: 1619 self.basename = name 1620 1621 def indexString(self, id, str): 1622 if str == None: 1623 return 1624 str = str.replace("'", ' ') 1625 str = str.replace('"', ' ') 1626 str = str.replace("/", ' ') 1627 str = str.replace('*', ' ') 1628 str = str.replace("[", ' ') 1629 str = str.replace("]", ' ') 1630 str = str.replace("(", ' ') 1631 str = str.replace(")", ' ') 1632 str = str.replace("<", ' ') 1633 str = str.replace('>', ' ') 1634 str = str.replace("&", ' ') 1635 str = str.replace('#', ' ') 1636 str = str.replace(",", ' ') 1637 str = str.replace('.', ' ') 1638 str = str.replace(';', ' ') 1639 tokens = str.split() 1640 for token in tokens: 1641 try: 1642 c = token[0] 1643 if string.ascii_letters.find(c) < 0: 1644 pass 1645 elif len(token) < 3: 1646 pass 1647 else: 1648 lower = token.lower() 1649 # TODO: generalize this a bit 1650 if lower == 'and' or lower == 'the': 1651 pass 1652 elif token in self.xref: 1653 self.xref[token].append(id) 1654 else: 1655 self.xref[token] = [id] 1656 except: 1657 pass 1658 1659 def analyze(self): 1660 print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) 1661 self.idx.analyze() 1662 1663 def scanHeaders(self): 1664 for header in list(self.headers.keys()): 1665 parser = CParser(header) 1666 idx = parser.parse() 1667 self.headers[header] = idx; 1668 self.idx.merge(idx) 1669 1670 def scanModules(self): 1671 for module in list(self.modules.keys()): 1672 parser = CParser(module) 1673 idx = parser.parse() 1674 # idx.analyze() 1675 self.modules[module] = idx 1676 self.idx.merge_public(idx) 1677 1678 def scan(self): 1679 for directory in self.directories: 1680 files = glob.glob(directory + "/*.c") 1681 for file in files: 1682 skip = 0 1683 for excl in self.excludes: 1684 if file.find(excl) != -1: 1685 print("Skipping %s" % file) 1686 skip = 1 1687 break 1688 if skip == 0: 1689 self.modules[file] = None; 1690 files = glob.glob(directory + "/*.h") 1691 for file in files: 1692 skip = 0 1693 for excl in self.excludes: 1694 if file.find(excl) != -1: 1695 print("Skipping %s" % file) 1696 skip = 1 1697 break 1698 if skip == 0: 1699 self.headers[file] = None; 1700 self.scanHeaders() 1701 self.scanModules() 1702 1703 def modulename_file(self, file): 1704 module = os.path.basename(file) 1705 if module[-2:] == '.h': 1706 module = module[:-2] 1707 elif module[-2:] == '.c': 1708 module = module[:-2] 1709 return module 1710 1711 def serialize_enum(self, output, name): 1712 id = self.idx.enums[name] 1713 output.write(" <enum name='%s' file='%s'" % (name, 1714 self.modulename_file(id.header))) 1715 if id.info != None: 1716 info = id.info 1717 if info[0] != None and info[0] != '': 1718 try: 1719 val = eval(info[0]) 1720 except: 1721 val = info[0] 1722 output.write(" value='%s'" % (val)); 1723 if info[2] != None and info[2] != '': 1724 output.write(" type='%s'" % info[2]); 1725 if info[1] != None and info[1] != '': 1726 output.write(" info='%s'" % escape(info[1])); 1727 output.write("/>\n") 1728 1729 def serialize_macro(self, output, name): 1730 id = self.idx.macros[name] 1731 output.write(" <macro name='%s' file='%s'>\n" % (name, 1732 self.modulename_file(id.header))) 1733 if id.info != None: 1734 try: 1735 (args, desc) = id.info 1736 if desc != None and desc != "": 1737 output.write(" <info>%s</info>\n" % (escape(desc))) 1738 self.indexString(name, desc) 1739 for arg in args: 1740 (name, desc) = arg 1741 if desc != None and desc != "": 1742 output.write(" <arg name='%s' info='%s'/>\n" % ( 1743 name, escape(desc))) 1744 self.indexString(name, desc) 1745 else: 1746 output.write(" <arg name='%s'/>\n" % (name)) 1747 except: 1748 pass 1749 output.write(" </macro>\n") 1750 1751 def serialize_typedef(self, output, name): 1752 id = self.idx.typedefs[name] 1753 if id.info[0:7] == 'struct ': 1754 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1755 name, self.modulename_file(id.header), id.info)) 1756 name = id.info[7:] 1757 if name in self.idx.structs and ( \ 1758 type(self.idx.structs[name].info) == type(()) or 1759 type(self.idx.structs[name].info) == type([])): 1760 output.write(">\n"); 1761 try: 1762 for field in self.idx.structs[name].info: 1763 desc = field[2] 1764 self.indexString(name, desc) 1765 if desc == None: 1766 desc = '' 1767 else: 1768 desc = escape(desc) 1769 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1770 except: 1771 print("Failed to serialize struct %s" % (name)) 1772 output.write(" </struct>\n") 1773 else: 1774 output.write("/>\n"); 1775 else : 1776 output.write(" <typedef name='%s' file='%s' type='%s'" % ( 1777 name, self.modulename_file(id.header), id.info)) 1778 try: 1779 desc = id.extra 1780 if desc != None and desc != "": 1781 output.write(">\n <info>%s</info>\n" % (escape(desc))) 1782 output.write(" </typedef>\n") 1783 else: 1784 output.write("/>\n") 1785 except: 1786 output.write("/>\n") 1787 1788 def serialize_variable(self, output, name): 1789 id = self.idx.variables[name] 1790 if id.info != None: 1791 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1792 name, self.modulename_file(id.header), id.info)) 1793 else: 1794 output.write(" <variable name='%s' file='%s'/>\n" % ( 1795 name, self.modulename_file(id.header))) 1796 1797 def serialize_function(self, output, name): 1798 id = self.idx.functions[name] 1799 if name == debugsym: 1800 print("=>", id) 1801 1802 output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, 1803 name, self.modulename_file(id.header), 1804 self.modulename_file(id.module))) 1805 # 1806 # Processing of conditionals modified by Bill 1/1/05 1807 # 1808 if id.conditionals != None: 1809 apstr = "" 1810 for cond in id.conditionals: 1811 if apstr != "": 1812 apstr = apstr + " && " 1813 apstr = apstr + cond 1814 output.write(" <cond>%s</cond>\n"% (apstr)); 1815 try: 1816 (ret, params, desc) = id.info 1817 if (desc == None or desc == '') and \ 1818 name[0:9] != "xmlThrDef" and name != "xmlDllMain": 1819 print("%s %s from %s has no description" % (id.type, name, 1820 self.modulename_file(id.module))) 1821 1822 output.write(" <info>%s</info>\n" % (escape(desc))) 1823 self.indexString(name, desc) 1824 if ret[0] != None: 1825 if ret[0] == "void": 1826 output.write(" <return type='void'/>\n") 1827 else: 1828 output.write(" <return type='%s' info='%s'/>\n" % ( 1829 ret[0], escape(ret[1]))) 1830 self.indexString(name, ret[1]) 1831 for param in params: 1832 if param[0] == 'void': 1833 continue 1834 if param[2] == None: 1835 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1836 else: 1837 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1838 self.indexString(name, param[2]) 1839 except: 1840 print("Failed to save function %s info: " % name, repr(id.info)) 1841 output.write(" </%s>\n" % (id.type)) 1842 1843 def serialize_exports(self, output, file): 1844 module = self.modulename_file(file) 1845 output.write(" <file name='%s'>\n" % (module)) 1846 dict = self.headers[file] 1847 if dict.info != None: 1848 for data in ('Summary', 'Description', 'Author'): 1849 try: 1850 output.write(" <%s>%s</%s>\n" % ( 1851 data.lower(), 1852 escape(dict.info[data]), 1853 data.lower())) 1854 except: 1855 print("Header %s lacks a %s description" % (module, data)) 1856 if 'Description' in dict.info: 1857 desc = dict.info['Description'] 1858 if desc.find("DEPRECATED") != -1: 1859 output.write(" <deprecated/>\n") 1860 1861 ids = list(dict.macros.keys()) 1862 ids.sort() 1863 for id in uniq(ids): 1864 # Macros are sometime used to masquerade other types. 1865 if id in dict.functions: 1866 continue 1867 if id in dict.variables: 1868 continue 1869 if id in dict.typedefs: 1870 continue 1871 if id in dict.structs: 1872 continue 1873 if id in dict.enums: 1874 continue 1875 output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) 1876 ids = list(dict.enums.keys()) 1877 ids.sort() 1878 for id in uniq(ids): 1879 output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) 1880 ids = list(dict.typedefs.keys()) 1881 ids.sort() 1882 for id in uniq(ids): 1883 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) 1884 ids = list(dict.structs.keys()) 1885 ids.sort() 1886 for id in uniq(ids): 1887 output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) 1888 ids = list(dict.variables.keys()) 1889 ids.sort() 1890 for id in uniq(ids): 1891 output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) 1892 ids = list(dict.functions.keys()) 1893 ids.sort() 1894 for id in uniq(ids): 1895 output.write(" <exports symbol='%s' type='function'/>\n" % (id)) 1896 output.write(" </file>\n") 1897 1898 def serialize_xrefs_files(self, output): 1899 headers = list(self.headers.keys()) 1900 headers.sort() 1901 for file in headers: 1902 module = self.modulename_file(file) 1903 output.write(" <file name='%s'>\n" % (module)) 1904 dict = self.headers[file] 1905 ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \ 1906 list(dict.macros.keys()) + list(dict.typedefs.keys()) + \ 1907 list(dict.structs.keys()) + list(dict.enums.keys())) 1908 ids.sort() 1909 for id in ids: 1910 output.write(" <ref name='%s'/>\n" % (id)) 1911 output.write(" </file>\n") 1912 pass 1913 1914 def serialize_xrefs_functions(self, output): 1915 funcs = {} 1916 for name in list(self.idx.functions.keys()): 1917 id = self.idx.functions[name] 1918 try: 1919 (ret, params, desc) = id.info 1920 for param in params: 1921 if param[0] == 'void': 1922 continue 1923 if param[0] in funcs: 1924 funcs[param[0]].append(name) 1925 else: 1926 funcs[param[0]] = [name] 1927 except: 1928 pass 1929 typ = list(funcs.keys()) 1930 typ.sort() 1931 for type in typ: 1932 if type == '' or type == 'void' or type == "int" or \ 1933 type == "char *" or type == "const char *" : 1934 continue 1935 output.write(" <type name='%s'>\n" % (type)) 1936 ids = funcs[type] 1937 ids.sort() 1938 pid = '' # not sure why we have dups, but get rid of them! 1939 for id in ids: 1940 if id != pid: 1941 output.write(" <ref name='%s'/>\n" % (id)) 1942 pid = id 1943 output.write(" </type>\n") 1944 1945 def serialize_xrefs_constructors(self, output): 1946 funcs = {} 1947 for name in list(self.idx.functions.keys()): 1948 id = self.idx.functions[name] 1949 try: 1950 (ret, params, desc) = id.info 1951 if ret[0] == "void": 1952 continue 1953 if ret[0] in funcs: 1954 funcs[ret[0]].append(name) 1955 else: 1956 funcs[ret[0]] = [name] 1957 except: 1958 pass 1959 typ = list(funcs.keys()) 1960 typ.sort() 1961 for type in typ: 1962 if type == '' or type == 'void' or type == "int" or \ 1963 type == "char *" or type == "const char *" : 1964 continue 1965 output.write(" <type name='%s'>\n" % (type)) 1966 ids = funcs[type] 1967 ids.sort() 1968 for id in ids: 1969 output.write(" <ref name='%s'/>\n" % (id)) 1970 output.write(" </type>\n") 1971 1972 def serialize_xrefs_alpha(self, output): 1973 letter = None 1974 ids = list(self.idx.identifiers.keys()) 1975 ids.sort() 1976 for id in ids: 1977 if id[0] != letter: 1978 if letter != None: 1979 output.write(" </letter>\n") 1980 letter = id[0] 1981 output.write(" <letter name='%s'>\n" % (letter)) 1982 output.write(" <ref name='%s'/>\n" % (id)) 1983 if letter != None: 1984 output.write(" </letter>\n") 1985 1986 def serialize_xrefs_references(self, output): 1987 typ = list(self.idx.identifiers.keys()) 1988 typ.sort() 1989 for id in typ: 1990 idf = self.idx.identifiers[id] 1991 module = idf.header 1992 output.write(" <reference name='%s' href='%s'/>\n" % (id, 1993 'html/' + self.basename + '-' + 1994 self.modulename_file(module) + '.html#' + 1995 id)) 1996 1997 def serialize_xrefs_index(self, output): 1998 index = self.xref 1999 typ = list(index.keys()) 2000 typ.sort() 2001 letter = None 2002 count = 0 2003 chunk = 0 2004 chunks = [] 2005 for id in typ: 2006 if len(index[id]) > 30: 2007 continue 2008 if id[0] != letter: 2009 if letter == None or count > 200: 2010 if letter != None: 2011 output.write(" </letter>\n") 2012 output.write(" </chunk>\n") 2013 count = 0 2014 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2015 output.write(" <chunk name='chunk%s'>\n" % (chunk)) 2016 first_letter = id[0] 2017 chunk = chunk + 1 2018 elif letter != None: 2019 output.write(" </letter>\n") 2020 letter = id[0] 2021 output.write(" <letter name='%s'>\n" % (letter)) 2022 output.write(" <word name='%s'>\n" % (id)) 2023 tokens = index[id]; 2024 tokens.sort() 2025 tok = None 2026 for token in tokens: 2027 if tok == token: 2028 continue 2029 tok = token 2030 output.write(" <ref name='%s'/>\n" % (token)) 2031 count = count + 1 2032 output.write(" </word>\n") 2033 if letter != None: 2034 output.write(" </letter>\n") 2035 output.write(" </chunk>\n") 2036 if count != 0: 2037 chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) 2038 output.write(" <chunks>\n") 2039 for ch in chunks: 2040 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( 2041 ch[0], ch[1], ch[2])) 2042 output.write(" </chunks>\n") 2043 2044 def serialize_xrefs(self, output): 2045 output.write(" <references>\n") 2046 self.serialize_xrefs_references(output) 2047 output.write(" </references>\n") 2048 output.write(" <alpha>\n") 2049 self.serialize_xrefs_alpha(output) 2050 output.write(" </alpha>\n") 2051 output.write(" <constructors>\n") 2052 self.serialize_xrefs_constructors(output) 2053 output.write(" </constructors>\n") 2054 output.write(" <functions>\n") 2055 self.serialize_xrefs_functions(output) 2056 output.write(" </functions>\n") 2057 output.write(" <files>\n") 2058 self.serialize_xrefs_files(output) 2059 output.write(" </files>\n") 2060 output.write(" <index>\n") 2061 self.serialize_xrefs_index(output) 2062 output.write(" </index>\n") 2063 2064 def serialize(self): 2065 filename = "%s-api.xml" % self.name 2066 print("Saving XML description %s" % (filename)) 2067 output = open(filename, "w") 2068 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2069 output.write("<api name='%s'>\n" % self.name) 2070 output.write(" <files>\n") 2071 headers = list(self.headers.keys()) 2072 headers.sort() 2073 for file in headers: 2074 self.serialize_exports(output, file) 2075 output.write(" </files>\n") 2076 output.write(" <symbols>\n") 2077 macros = list(self.idx.macros.keys()) 2078 macros.sort() 2079 for macro in macros: 2080 self.serialize_macro(output, macro) 2081 enums = list(self.idx.enums.keys()) 2082 enums.sort() 2083 for enum in enums: 2084 self.serialize_enum(output, enum) 2085 typedefs = list(self.idx.typedefs.keys()) 2086 typedefs.sort() 2087 for typedef in typedefs: 2088 self.serialize_typedef(output, typedef) 2089 variables = list(self.idx.variables.keys()) 2090 variables.sort() 2091 for variable in variables: 2092 self.serialize_variable(output, variable) 2093 functions = list(self.idx.functions.keys()) 2094 functions.sort() 2095 for function in functions: 2096 self.serialize_function(output, function) 2097 output.write(" </symbols>\n") 2098 output.write("</api>\n") 2099 output.close() 2100 2101 filename = "%s-refs.xml" % self.name 2102 print("Saving XML Cross References %s" % (filename)) 2103 output = open(filename, "w") 2104 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 2105 output.write("<apirefs name='%s'>\n" % self.name) 2106 self.serialize_xrefs(output) 2107 output.write("</apirefs>\n") 2108 output.close() 2109 2110 2111def rebuild(): 2112 builder = None 2113 if glob.glob("parser.c") != [] : 2114 print("Rebuilding API description for libxml2") 2115 builder = docBuilder("libxml2", [".", "."], 2116 ["xmlwin32version.h", "tst.c"]) 2117 elif glob.glob("../parser.c") != [] : 2118 print("Rebuilding API description for libxml2") 2119 builder = docBuilder("libxml2", ["..", "../include/libxml"], 2120 ["xmlwin32version.h", "tst.c"]) 2121 elif glob.glob("../libxslt/transform.c") != [] : 2122 print("Rebuilding API description for libxslt") 2123 builder = docBuilder("libxslt", ["../libxslt"], 2124 ["win32config.h", "libxslt.h", "tst.c"]) 2125 else: 2126 print("rebuild() failed, unable to guess the module") 2127 return None 2128 builder.scan() 2129 builder.analyze() 2130 builder.serialize() 2131 if glob.glob("../libexslt/exslt.c") != [] : 2132 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 2133 extra.scan() 2134 extra.analyze() 2135 extra.serialize() 2136 return builder 2137 2138# 2139# for debugging the parser 2140# 2141def parse(filename): 2142 parser = CParser(filename) 2143 idx = parser.parse() 2144 return idx 2145 2146if __name__ == "__main__": 2147 if len(sys.argv) > 1: 2148 debug = 1 2149 parse(sys.argv[1]) 2150 else: 2151 rebuild() 2152