apibuild.py revision d8cf90610c63a695d948245cc4e22890e2c063be
1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14# 15# C parser analysis code 16# 17ignored_files = { 18 "trio": "too many non standard macros", 19 "trio.c": "too many non standard macros", 20 "trionan.c": "too many non standard macros", 21 "triostr.c": "too many non standard macros", 22 "acconfig.h": "generated portability layer", 23 "config.h": "generated portability layer", 24 "libxml.h": "internal only", 25 "testOOM.c": "out of memory tester", 26 "testOOMlib.h": "out of memory tester", 27 "testOOMlib.c": "out of memory tester", 28 "pattern.c": "not integrated yet", 29 "pattern.h": "not integrated yet", 30} 31 32ignored_words = { 33 "WINAPI": (0, "Windows keyword"), 34 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 35 "XMLPUBVAR": (0, "Special macro for extern vars for win32"), 36 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), 37 "XMLCALL": (0, "Special macro for win32 calls"), 38 "__declspec": (3, "Windows keyword"), 39 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 40 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 41 "X_IN_Y": (5, "macro function builder"), 42} 43 44def escape(raw): 45 raw = string.replace(raw, '&', '&') 46 raw = string.replace(raw, '<', '<') 47 raw = string.replace(raw, '>', '>') 48 raw = string.replace(raw, "'", ''') 49 raw = string.replace(raw, '"', '"') 50 return raw 51 52class identifier: 53 def __init__(self, name, module=None, type=None, lineno = 0, 54 info=None, extra=None): 55 self.name = name 56 self.module = module 57 self.type = type 58 self.info = info 59 self.extra = extra 60 self.lineno = lineno 61 self.static = 0 62 63 def __repr__(self): 64 r = "%s %s:" % (self.type, self.name) 65 if self.static: 66 r = r + " static" 67 if self.module != None: 68 r = r + " from %s" % (self.module) 69 if self.info != None: 70 r = r + " " + `self.info` 71 if self.extra != None: 72 r = r + " " + `self.extra` 73 return r 74 75 76 def set_module(self, module): 77 self.module = module 78 def set_type(self, type): 79 self.type = type 80 def set_info(self, info): 81 self.info = info 82 def set_extra(self, extra): 83 self.extra = extra 84 def set_lineno(self, lineno): 85 self.lineno = lineno 86 def set_static(self, static): 87 self.static = static 88 89 def get_name(self): 90 return self.name 91 def get_module(self): 92 return self.module 93 def get_type(self): 94 return self.type 95 def get_info(self): 96 return self.info 97 def get_lineno(self): 98 return self.lineno 99 def get_extra(self): 100 return self.extra 101 def get_static(self): 102 return self.static 103 104 def update(self, module, type = None, info = None, extra=None): 105 if module != None and self.module == None: 106 self.set_module(module) 107 if type != None and self.type == None: 108 self.set_type(type) 109 if info != None: 110 self.set_info(info) 111 if extra != None: 112 self.set_extra(extra) 113 114 115class index: 116 def __init__(self, name = "noname"): 117 self.name = name 118 self.identifiers = {} 119 self.functions = {} 120 self.variables = {} 121 self.includes = {} 122 self.structs = {} 123 self.enums = {} 124 self.typedefs = {} 125 self.macros = {} 126 self.references = {} 127 128 def add_ref(self, name, module, static, type, lineno, info=None, extra=None): 129 if name[0:2] == '__': 130 return None 131 d = None 132 try: 133 d = self.identifiers[name] 134 d.update(module, type, lineno, info, extra) 135 except: 136 d = identifier(name, module, type, lineno, info, extra) 137 self.identifiers[name] = d 138 139 if d != None and static == 1: 140 d.set_static(1) 141 142 if d != None and name != None and type != None: 143 self.references[name] = d 144 145 def add(self, name, module, static, type, lineno, info=None, extra=None): 146 if name[0:2] == '__': 147 return None 148 d = None 149 try: 150 d = self.identifiers[name] 151 d.update(module, type, lineno, info, extra) 152 except: 153 d = identifier(name, module, type, lineno, info, extra) 154 self.identifiers[name] = d 155 156 if d != None and static == 1: 157 d.set_static(1) 158 159 if d != None and name != None and type != None: 160 if type == "function": 161 self.functions[name] = d 162 elif type == "functype": 163 self.functions[name] = d 164 elif type == "variable": 165 self.variables[name] = d 166 elif type == "include": 167 self.includes[name] = d 168 elif type == "struct": 169 self.structs[name] = d 170 elif type == "enum": 171 self.enums[name] = d 172 elif type == "typedef": 173 self.typedefs[name] = d 174 elif type == "macro": 175 self.macros[name] = d 176 else: 177 print "Unable to register type ", type 178 return d 179 180 def merge(self, idx): 181 for id in idx.functions.keys(): 182 # 183 # macro might be used to override functions or variables 184 # definitions 185 # 186 if self.macros.has_key(id): 187 del self.macros[id] 188 if self.functions.has_key(id): 189 print "function %s from %s redeclared in %s" % ( 190 id, self.functions[id].module, idx.functions[id].module) 191 else: 192 self.functions[id] = idx.functions[id] 193 self.identifiers[id] = idx.functions[id] 194 for id in idx.variables.keys(): 195 # 196 # macro might be used to override functions or variables 197 # definitions 198 # 199 if self.macros.has_key(id): 200 del self.macros[id] 201 if self.variables.has_key(id): 202 print "variable %s from %s redeclared in %s" % ( 203 id, self.variables[id].module, idx.variables[id].module) 204 else: 205 self.variables[id] = idx.variables[id] 206 self.identifiers[id] = idx.variables[id] 207 for id in idx.structs.keys(): 208 if self.structs.has_key(id): 209 print "struct %s from %s redeclared in %s" % ( 210 id, self.structs[id].module, idx.structs[id].module) 211 else: 212 self.structs[id] = idx.structs[id] 213 self.identifiers[id] = idx.structs[id] 214 for id in idx.typedefs.keys(): 215 if self.typedefs.has_key(id): 216 print "typedef %s from %s redeclared in %s" % ( 217 id, self.typedefs[id].module, idx.typedefs[id].module) 218 else: 219 self.typedefs[id] = idx.typedefs[id] 220 self.identifiers[id] = idx.typedefs[id] 221 for id in idx.macros.keys(): 222 # 223 # macro might be used to override functions or variables 224 # definitions 225 # 226 if self.variables.has_key(id): 227 continue 228 if self.functions.has_key(id): 229 continue 230 if self.enums.has_key(id): 231 continue 232 if self.macros.has_key(id): 233 print "macro %s from %s redeclared in %s" % ( 234 id, self.macros[id].module, idx.macros[id].module) 235 else: 236 self.macros[id] = idx.macros[id] 237 self.identifiers[id] = idx.macros[id] 238 for id in idx.enums.keys(): 239 if self.enums.has_key(id): 240 print "enum %s from %s redeclared in %s" % ( 241 id, self.enums[id].module, idx.enums[id].module) 242 else: 243 self.enums[id] = idx.enums[id] 244 self.identifiers[id] = idx.enums[id] 245 246 def merge_public(self, idx): 247 for id in idx.functions.keys(): 248 if self.functions.has_key(id): 249 up = idx.functions[id] 250 self.functions[id].update(None, up.type, up.info, up.extra) 251 # else: 252 # print "Function %s from %s is not declared in headers" % ( 253 # id, idx.functions[id].module) 254 # TODO: do the same for variables. 255 256 def analyze_dict(self, type, dict): 257 count = 0 258 public = 0 259 for name in dict.keys(): 260 id = dict[name] 261 count = count + 1 262 if id.static == 0: 263 public = public + 1 264 if count != public: 265 print " %d %s , %d public" % (count, type, public) 266 elif count != 0: 267 print " %d public %s" % (count, type) 268 269 270 def analyze(self): 271 self.analyze_dict("functions", self.functions) 272 self.analyze_dict("variables", self.variables) 273 self.analyze_dict("structs", self.structs) 274 self.analyze_dict("typedefs", self.typedefs) 275 self.analyze_dict("macros", self.macros) 276 277class CLexer: 278 """A lexer for the C language, tokenize the input by reading and 279 analyzing it line by line""" 280 def __init__(self, input): 281 self.input = input 282 self.tokens = [] 283 self.line = "" 284 self.lineno = 0 285 286 def getline(self): 287 line = '' 288 while line == '': 289 line = self.input.readline() 290 if not line: 291 return None 292 self.lineno = self.lineno + 1 293 line = string.lstrip(line) 294 line = string.rstrip(line) 295 if line == '': 296 continue 297 while line[-1] == '\\': 298 line = line[:-1] 299 n = self.input.readline() 300 self.lineno = self.lineno + 1 301 n = string.lstrip(n) 302 n = string.rstrip(n) 303 if not n: 304 break 305 else: 306 line = line + n 307 return line 308 309 def getlineno(self): 310 return self.lineno 311 312 def push(self, token): 313 self.tokens.insert(0, token); 314 315 def debug(self): 316 print "Last token: ", self.last 317 print "Token queue: ", self.tokens 318 print "Line %d end: " % (self.lineno), self.line 319 320 def token(self): 321 while self.tokens == []: 322 if self.line == "": 323 line = self.getline() 324 else: 325 line = self.line 326 self.line = "" 327 if line == None: 328 return None 329 330 if line[0] == '#': 331 self.tokens = map((lambda x: ('preproc', x)), 332 string.split(line)) 333 break; 334 l = len(line) 335 if line[0] == '"' or line[0] == "'": 336 end = line[0] 337 line = line[1:] 338 found = 0 339 tok = "" 340 while found == 0: 341 i = 0 342 l = len(line) 343 while i < l: 344 if line[i] == end: 345 self.line = line[i+1:] 346 line = line[:i] 347 l = i 348 found = 1 349 break 350 if line[i] == '\\': 351 i = i + 1 352 i = i + 1 353 tok = tok + line 354 if found == 0: 355 line = self.getline() 356 if line == None: 357 return None 358 self.last = ('string', tok) 359 return self.last 360 361 if l >= 2 and line[0] == '/' and line[1] == '*': 362 line = line[2:] 363 found = 0 364 tok = "" 365 while found == 0: 366 i = 0 367 l = len(line) 368 while i < l: 369 if line[i] == '*' and i+1 < l and line[i+1] == '/': 370 self.line = line[i+2:] 371 line = line[:i-1] 372 l = i 373 found = 1 374 break 375 i = i + 1 376 if tok != "": 377 tok = tok + "\n" 378 tok = tok + line 379 if found == 0: 380 line = self.getline() 381 if line == None: 382 return None 383 self.last = ('comment', tok) 384 return self.last 385 if l >= 2 and line[0] == '/' and line[1] == '/': 386 line = line[2:] 387 self.last = ('comment', line) 388 return self.last 389 i = 0 390 while i < l: 391 if line[i] == '/' and i+1 < l and line[i+1] == '/': 392 self.line = line[i:] 393 line = line[:i] 394 break 395 if line[i] == '/' and i+1 < l and line[i+1] == '*': 396 self.line = line[i:] 397 line = line[:i] 398 break 399 if line[i] == '"' or line[i] == "'": 400 self.line = line[i:] 401 line = line[:i] 402 break 403 i = i + 1 404 l = len(line) 405 i = 0 406 while i < l: 407 if line[i] == ' ' or line[i] == '\t': 408 i = i + 1 409 continue 410 o = ord(line[i]) 411 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 412 (o >= 48 and o <= 57): 413 s = i 414 while i < l: 415 o = ord(line[i]) 416 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 417 (o >= 48 and o <= 57) or string.find( 418 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 419 i = i + 1 420 else: 421 break 422 self.tokens.append(('name', line[s:i])) 423 continue 424 if string.find("(){}:;,[]", line[i]) != -1: 425# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 426# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 427# line[i] == ',' or line[i] == '[' or line[i] == ']': 428 self.tokens.append(('sep', line[i])) 429 i = i + 1 430 continue 431 if string.find("+-*><=/%&!|.", line[i]) != -1: 432# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 433# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 434# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 435# line[i] == '!' or line[i] == '|' or line[i] == '.': 436 if line[i] == '.' and i + 2 < l and \ 437 line[i+1] == '.' and line[i+2] == '.': 438 self.tokens.append(('name', '...')) 439 i = i + 3 440 continue 441 442 j = i + 1 443 if j < l and ( 444 string.find("+-*><=/%&!|", line[j]) != -1): 445# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 446# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 447# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 448# line[j] == '!' or line[j] == '|'): 449 self.tokens.append(('op', line[i:j+1])) 450 i = j + 1 451 else: 452 self.tokens.append(('op', line[i])) 453 i = i + 1 454 continue 455 s = i 456 while i < l: 457 o = ord(line[i]) 458 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 459 (o >= 48 and o <= 57) or ( 460 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 461# line[i] != ' ' and line[i] != '\t' and 462# line[i] != '(' and line[i] != ')' and 463# line[i] != '{' and line[i] != '}' and 464# line[i] != ':' and line[i] != ';' and 465# line[i] != ',' and line[i] != '+' and 466# line[i] != '-' and line[i] != '*' and 467# line[i] != '/' and line[i] != '%' and 468# line[i] != '&' and line[i] != '!' and 469# line[i] != '|' and line[i] != '[' and 470# line[i] != ']' and line[i] != '=' and 471# line[i] != '*' and line[i] != '>' and 472# line[i] != '<'): 473 i = i + 1 474 else: 475 break 476 self.tokens.append(('name', line[s:i])) 477 478 tok = self.tokens[0] 479 self.tokens = self.tokens[1:] 480 self.last = tok 481 return tok 482 483class CParser: 484 """The C module parser""" 485 def __init__(self, filename, idx = None): 486 self.filename = filename 487 if len(filename) > 2 and filename[-2:] == '.h': 488 self.is_header = 1 489 else: 490 self.is_header = 0 491 self.input = open(filename) 492 self.lexer = CLexer(self.input) 493 if idx == None: 494 self.index = index() 495 else: 496 self.index = idx 497 self.top_comment = "" 498 self.last_comment = "" 499 self.comment = None 500 self.collect_ref = 0 501 502 def collect_references(self): 503 self.collect_ref = 1 504 505 def lineno(self): 506 return self.lexer.getlineno() 507 508 def index_add(self, name, module, static, type, info=None, extra = None): 509 self.index.add(name, module, static, type, self.lineno(), 510 info, extra) 511 512 def index_add_ref(self, name, module, static, type, info=None, 513 extra = None): 514 self.index.add_ref(name, module, static, type, self.lineno(), 515 info, extra) 516 517 def error(self, msg, token=-1): 518 print "Parse Error: " + msg 519 if token != -1: 520 print "Got token ", token 521 self.lexer.debug() 522 sys.exit(1) 523 524 def debug(self, msg, token=-1): 525 print "Debug: " + msg 526 if token != -1: 527 print "Got token ", token 528 self.lexer.debug() 529 530 def parseComment(self, token): 531 if self.top_comment == "": 532 self.top_comment = token[1] 533 if self.comment == None or token[1][0] == '*': 534 self.comment = token[1]; 535 else: 536 self.comment = self.comment + token[1] 537 token = self.lexer.token() 538 return token 539 540 # 541 # Parse a comment block associate to a macro 542 # 543 def parseMacroComment(self, name, quiet = 0): 544 if name[0:2] == '__': 545 quiet = 1 546 547 args = [] 548 desc = "" 549 550 if self.comment == None: 551 if not quiet: 552 print "Missing comment for macro %s" % (name) 553 return((args, desc)) 554 if self.comment[0] != '*': 555 if not quiet: 556 print "Missing * in macro comment for %s" % (name) 557 return((args, desc)) 558 lines = string.split(self.comment, '\n') 559 if lines[0] == '*': 560 del lines[0] 561 if lines[0] != "* %s:" % (name): 562 if not quiet: 563 print "Misformatted macro comment for %s" % (name) 564 print " Expecting '* %s:' got '%s'" % (name, lines[0]) 565 return((args, desc)) 566 del lines[0] 567 while lines[0] == '*': 568 del lines[0] 569 while len(lines) > 0 and lines[0][0:3] == '* @': 570 l = lines[0][3:] 571 try: 572 (arg, desc) = string.split(l, ':', 1) 573 desc=string.strip(desc) 574 arg=string.strip(arg) 575 except: 576 if not quiet: 577 print "Misformatted macro comment for %s" % (name) 578 print " problem with '%s'" % (lines[0]) 579 del lines[0] 580 continue 581 del lines[0] 582 l = string.strip(lines[0]) 583 while len(l) > 2 and l[0:3] != '* @': 584 while l[0] == '*': 585 l = l[1:] 586 desc = desc + ' ' + string.strip(l) 587 del lines[0] 588 if len(lines) == 0: 589 break 590 l = lines[0] 591 args.append((arg, desc)) 592 while len(lines) > 0 and lines[0] == '*': 593 del lines[0] 594 desc = "" 595 while len(lines) > 0: 596 l = lines[0] 597 while len(l) > 0 and l[0] == '*': 598 l = l[1:] 599 l = string.strip(l) 600 desc = desc + " " + l 601 del lines[0] 602 603 desc = string.strip(desc) 604 605 if quiet == 0: 606 if desc == "": 607 print "Macro comment for %s lack description of the macro" % (name) 608 609 return((args, desc)) 610 611 # 612 # Parse a comment block and merge the informations found in the 613 # parameters descriptions, finally returns a block as complete 614 # as possible 615 # 616 def mergeFunctionComment(self, name, description, quiet = 0): 617 if name == 'main': 618 quiet = 1 619 if name[0:2] == '__': 620 quiet = 1 621 622 (ret, args) = description 623 desc = "" 624 retdesc = "" 625 626 if self.comment == None: 627 if not quiet: 628 print "Missing comment for function %s" % (name) 629 return(((ret[0], retdesc), args, desc)) 630 if self.comment[0] != '*': 631 if not quiet: 632 print "Missing * in function comment for %s" % (name) 633 return(((ret[0], retdesc), args, desc)) 634 lines = string.split(self.comment, '\n') 635 if lines[0] == '*': 636 del lines[0] 637 if lines[0] != "* %s:" % (name): 638 if not quiet: 639 print "Misformatted function comment for %s" % (name) 640 print " Expecting '* %s:' got '%s'" % (name, lines[0]) 641 return(((ret[0], retdesc), args, desc)) 642 del lines[0] 643 while lines[0] == '*': 644 del lines[0] 645 nbargs = len(args) 646 while len(lines) > 0 and lines[0][0:3] == '* @': 647 l = lines[0][3:] 648 try: 649 (arg, desc) = string.split(l, ':', 1) 650 desc=string.strip(desc) 651 arg=string.strip(arg) 652 except: 653 if not quiet: 654 print "Misformatted function comment for %s" % (name) 655 print " problem with '%s'" % (lines[0]) 656 del lines[0] 657 continue 658 del lines[0] 659 l = string.strip(lines[0]) 660 while len(l) > 2 and l[0:3] != '* @': 661 while l[0] == '*': 662 l = l[1:] 663 desc = desc + ' ' + string.strip(l) 664 del lines[0] 665 if len(lines) == 0: 666 break 667 l = lines[0] 668 i = 0 669 while i < nbargs: 670 if args[i][1] == arg: 671 args[i] = (args[i][0], arg, desc) 672 break; 673 i = i + 1 674 if i >= nbargs: 675 if not quiet: 676 print "Uname to find arg %s from function comment for %s" % ( 677 arg, name) 678 while len(lines) > 0 and lines[0] == '*': 679 del lines[0] 680 desc = "" 681 while len(lines) > 0: 682 l = lines[0] 683 while len(l) > 0 and l[0] == '*': 684 l = l[1:] 685 l = string.strip(l) 686 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 687 try: 688 l = string.split(l, ' ', 1)[1] 689 except: 690 l = "" 691 retdesc = string.strip(l) 692 del lines[0] 693 while len(lines) > 0: 694 l = lines[0] 695 while len(l) > 0 and l[0] == '*': 696 l = l[1:] 697 l = string.strip(l) 698 retdesc = retdesc + " " + l 699 del lines[0] 700 else: 701 desc = desc + " " + l 702 del lines[0] 703 704 retdesc = string.strip(retdesc) 705 desc = string.strip(desc) 706 707 if quiet == 0: 708 # 709 # report missing comments 710 # 711 i = 0 712 while i < nbargs: 713 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: 714 print "Function comment for %s lack description of arg %s" % (name, args[i][1]) 715 i = i + 1 716 if retdesc == "" and ret[0] != "void": 717 print "Function comment for %s lack description of return value" % (name) 718 if desc == "": 719 print "Function comment for %s lack description of the function" % (name) 720 721 722 return(((ret[0], retdesc), args, desc)) 723 724 def parsePreproc(self, token): 725 name = token[1] 726 if name == "#include": 727 token = self.lexer.token() 728 if token == None: 729 return None 730 if token[0] == 'preproc': 731 self.index_add(token[1], self.filename, not self.is_header, 732 "include") 733 return self.lexer.token() 734 return token 735 if name == "#define": 736 token = self.lexer.token() 737 if token == None: 738 return None 739 if token[0] == 'preproc': 740 # TODO macros with arguments 741 name = token[1] 742 lst = [] 743 token = self.lexer.token() 744 while token != None and token[0] == 'preproc' and \ 745 token[1][0] != '#': 746 lst.append(token[1]) 747 token = self.lexer.token() 748 try: 749 name = string.split(name, '(') [0] 750 except: 751 pass 752 info = self.parseMacroComment(name, not self.is_header) 753 self.index_add(name, self.filename, not self.is_header, 754 "macro", info) 755 return token 756 token = self.lexer.token() 757 while token != None and token[0] == 'preproc' and \ 758 token[1][0] != '#': 759 token = self.lexer.token() 760 return token 761 762 # 763 # token acquisition on top of the lexer, it handle internally 764 # preprocessor and comments since they are logically not part of 765 # the program structure. 766 # 767 def token(self): 768 global ignored_words 769 770 token = self.lexer.token() 771 while token != None: 772 if token[0] == 'comment': 773 token = self.parseComment(token) 774 continue 775 elif token[0] == 'preproc': 776 token = self.parsePreproc(token) 777 continue 778 elif token[0] == "name" and ignored_words.has_key(token[1]): 779 (n, info) = ignored_words[token[1]] 780 i = 0 781 while i < n: 782 token = self.lexer.token() 783 i = i + 1 784 token = self.lexer.token() 785 continue 786 else: 787 #print "=> ", token 788 return token 789 return None 790 791 # 792 # Parse a typedef, it records the type and its name. 793 # 794 def parseTypedef(self, token): 795 if token == None: 796 return None 797 token = self.parseType(token) 798 if token == None: 799 self.error("parsing typedef") 800 return None 801 base_type = self.type 802 type = base_type 803 #self.debug("end typedef type", token) 804 while token != None: 805 if token[0] == "name": 806 name = token[1] 807 signature = self.signature 808 if signature != None: 809 type = string.split(type, '(')[0] 810 d = self.mergeFunctionComment(name, 811 ((type, None), signature), 1) 812 self.index_add(name, self.filename, not self.is_header, 813 "functype", d) 814 else: 815 if base_type == "struct": 816 self.index_add(name, self.filename, not self.is_header, 817 "struct", type) 818 base_type = "struct " + name 819 else: 820 self.index_add(name, self.filename, not self.is_header, 821 "typedef", type) 822 token = self.token() 823 else: 824 self.error("parsing typedef: expecting a name") 825 return token 826 #self.debug("end typedef", token) 827 if token != None and token[0] == 'sep' and token[1] == ',': 828 type = base_type 829 token = self.token() 830 while token != None and token[0] == "op": 831 type = type + token[1] 832 token = self.token() 833 elif token != None and token[0] == 'sep' and token[1] == ';': 834 break; 835 elif token != None and token[0] == 'name': 836 type = base_type 837 continue; 838 else: 839 self.error("parsing typedef: expecting ';'", token) 840 return token 841 token = self.token() 842 return token 843 844 # 845 # Parse a C code block, used for functions it parse till 846 # the balancing } included 847 # 848 def parseBlock(self, token): 849 while token != None: 850 if token[0] == "sep" and token[1] == "{": 851 token = self.token() 852 token = self.parseBlock(token) 853 elif token[0] == "sep" and token[1] == "}": 854 self.comment = None 855 token = self.token() 856 return token 857 else: 858 if self.collect_ref == 1: 859 oldtok = token 860 token = self.token() 861 if oldtok[0] == "name" and oldtok[1][0:3] == "xml": 862 if token[0] == "sep" and token[1] == "(": 863 self.index_add_ref(oldtok[1], self.filename, 864 0, "function") 865 token = self.token() 866 elif token[0] == "name": 867 token = self.token() 868 if token[0] == "sep" and (token[1] == ";" or 869 token[1] == "," or token[1] == "="): 870 self.index_add_ref(oldtok[1], self.filename, 871 0, "type") 872 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": 873 self.index_add_ref(oldtok[1], self.filename, 874 0, "typedef") 875 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": 876 self.index_add_ref(oldtok[1], self.filename, 877 0, "typedef") 878 879 else: 880 token = self.token() 881 return token 882 883 # 884 # Parse a C struct definition till the balancing } 885 # 886 def parseStruct(self, token): 887 fields = [] 888 #self.debug("start parseStruct", token) 889 while token != None: 890 if token[0] == "sep" and token[1] == "{": 891 token = self.token() 892 token = self.parseTypeBlock(token) 893 elif token[0] == "sep" and token[1] == "}": 894 self.struct_fields = fields 895 #self.debug("end parseStruct", token) 896 #print fields 897 token = self.token() 898 return token 899 else: 900 base_type = self.type 901 #self.debug("before parseType", token) 902 token = self.parseType(token) 903 #self.debug("after parseType", token) 904 if token != None and token[0] == "name": 905 fname = token[1] 906 token = self.token() 907 if token[0] == "sep" and token[1] == ";": 908 self.comment = None 909 token = self.token() 910 fields.append((self.type, fname, self.comment)) 911 self.comment = None 912 else: 913 self.error("parseStruct: expecting ;", token) 914 elif token != None and token[0] == "sep" and token[1] == "{": 915 token = self.token() 916 token = self.parseTypeBlock(token) 917 if token != None and token[0] == "name": 918 token = self.token() 919 if token != None and token[0] == "sep" and token[1] == ";": 920 token = self.token() 921 else: 922 self.error("parseStruct: expecting ;", token) 923 else: 924 self.error("parseStruct: name", token) 925 token = self.token() 926 self.type = base_type; 927 self.struct_fields = fields 928 #self.debug("end parseStruct", token) 929 #print fields 930 return token 931 932 # 933 # Parse a C enum block, parse till the balancing } 934 # 935 def parseEnumBlock(self, token): 936 self.enums = [] 937 name = None 938 self.comment = None 939 comment = "" 940 value = "0" 941 while token != None: 942 if token[0] == "sep" and token[1] == "{": 943 token = self.token() 944 token = self.parseTypeBlock(token) 945 elif token[0] == "sep" and token[1] == "}": 946 if name != None: 947 if self.comment != None: 948 comment = self.comment 949 self.comment = None 950 self.enums.append((name, value, comment)) 951 token = self.token() 952 return token 953 elif token[0] == "name": 954 if name != None: 955 if self.comment != None: 956 comment = string.strip(self.comment) 957 self.comment = None 958 self.enums.append((name, value, comment)) 959 name = token[1] 960 comment = "" 961 token = self.token() 962 if token[0] == "op" and token[1][0] == "=": 963 value = "" 964 if len(token[1]) > 1: 965 value = token[1][1:] 966 token = self.token() 967 while token[0] != "sep" or (token[1] != ',' and 968 token[1] != '}'): 969 value = value + token[1] 970 token = self.token() 971 else: 972 try: 973 value = "%d" % (int(value) + 1) 974 except: 975 print "Failed to compute value of enum %s" % (name) 976 value="" 977 if token[0] == "sep" and token[1] == ",": 978 token = self.token() 979 else: 980 token = self.token() 981 return token 982 983 # 984 # Parse a C definition block, used for structs it parse till 985 # the balancing } 986 # 987 def parseTypeBlock(self, token): 988 while token != None: 989 if token[0] == "sep" and token[1] == "{": 990 token = self.token() 991 token = self.parseTypeBlock(token) 992 elif token[0] == "sep" and token[1] == "}": 993 token = self.token() 994 return token 995 else: 996 token = self.token() 997 return token 998 999 # 1000 # Parse a type: the fact that the type name can either occur after 1001 # the definition or within the definition makes it a little harder 1002 # if inside, the name token is pushed back before returning 1003 # 1004 def parseType(self, token): 1005 self.type = "" 1006 self.struct_fields = [] 1007 self.signature = None 1008 if token == None: 1009 return token 1010 1011 while token[0] == "name" and ( 1012 token[1] == "const" or token[1] == "unsigned"): 1013 if self.type == "": 1014 self.type = token[1] 1015 else: 1016 self.type = self.type + " " + token[1] 1017 token = self.token() 1018 1019 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 1020 if self.type == "": 1021 self.type = token[1] 1022 else: 1023 self.type = self.type + " " + token[1] 1024 if token[0] == "name" and token[1] == "int": 1025 if self.type == "": 1026 self.type = tmp[1] 1027 else: 1028 self.type = self.type + " " + tmp[1] 1029 1030 elif token[0] == "name" and token[1] == "struct": 1031 if self.type == "": 1032 self.type = token[1] 1033 else: 1034 self.type = self.type + " " + token[1] 1035 token = self.token() 1036 nametok = None 1037 if token[0] == "name": 1038 nametok = token 1039 token = self.token() 1040 if token != None and token[0] == "sep" and token[1] == "{": 1041 token = self.token() 1042 token = self.parseStruct(token) 1043 elif token != None and token[0] == "op" and token[1] == "*": 1044 self.type = self.type + " " + nametok[1] + " *" 1045 token = self.token() 1046 while token != None and token[0] == "op" and token[1] == "*": 1047 self.type = self.type + " *" 1048 token = self.token() 1049 if token[0] == "name": 1050 nametok = token 1051 token = self.token() 1052 else: 1053 self.error("struct : expecting name", token) 1054 return token 1055 elif token != None and token[0] == "name" and nametok != None: 1056 self.type = self.type + " " + nametok[1] 1057 return token 1058 1059 if nametok != None: 1060 self.lexer.push(token) 1061 token = nametok 1062 return token 1063 1064 elif token[0] == "name" and token[1] == "enum": 1065 if self.type == "": 1066 self.type = token[1] 1067 else: 1068 self.type = self.type + " " + token[1] 1069 self.enums = [] 1070 token = self.token() 1071 if token != None and token[0] == "sep" and token[1] == "{": 1072 token = self.token() 1073 token = self.parseEnumBlock(token) 1074 else: 1075 self.error("parsing enum: expecting '{'", token) 1076 enum_type = None 1077 if token != None and token[0] != "name": 1078 self.lexer.push(token) 1079 token = ("name", "enum") 1080 else: 1081 enum_type = token[1] 1082 for enum in self.enums: 1083 self.index_add(enum[0], self.filename, 1084 not self.is_header, "enum", 1085 (enum[1], enum[2], enum_type)) 1086 return token 1087 1088 elif token[0] == "name": 1089 if self.type == "": 1090 self.type = token[1] 1091 else: 1092 self.type = self.type + " " + token[1] 1093 else: 1094 self.error("parsing type %s: expecting a name" % (self.type), 1095 token) 1096 return token 1097 token = self.token() 1098 while token != None and (token[0] == "op" or 1099 token[0] == "name" and token[1] == "const"): 1100 self.type = self.type + " " + token[1] 1101 token = self.token() 1102 1103 # 1104 # if there is a parenthesis here, this means a function type 1105 # 1106 if token != None and token[0] == "sep" and token[1] == '(': 1107 self.type = self.type + token[1] 1108 token = self.token() 1109 while token != None and token[0] == "op" and token[1] == '*': 1110 self.type = self.type + token[1] 1111 token = self.token() 1112 if token == None or token[0] != "name" : 1113 self.error("parsing function type, name expected", token); 1114 return token 1115 self.type = self.type + token[1] 1116 nametok = token 1117 token = self.token() 1118 if token != None and token[0] == "sep" and token[1] == ')': 1119 self.type = self.type + token[1] 1120 token = self.token() 1121 if token != None and token[0] == "sep" and token[1] == '(': 1122 token = self.token() 1123 type = self.type; 1124 token = self.parseSignature(token); 1125 self.type = type; 1126 else: 1127 self.error("parsing function type, '(' expected", token); 1128 return token 1129 else: 1130 self.error("parsing function type, ')' expected", token); 1131 return token 1132 self.lexer.push(token) 1133 token = nametok 1134 return token 1135 1136 # 1137 # do some lookahead for arrays 1138 # 1139 if token != None and token[0] == "name": 1140 nametok = token 1141 token = self.token() 1142 if token != None and token[0] == "sep" and token[1] == '[': 1143 self.type = self.type + nametok[1] 1144 while token != None and token[0] == "sep" and token[1] == '[': 1145 self.type = self.type + token[1] 1146 token = self.token() 1147 while token != None and token[0] != 'sep' and \ 1148 token[1] != ']' and token[1] != ';': 1149 self.type = self.type + token[1] 1150 token = self.token() 1151 if token != None and token[0] == 'sep' and token[1] == ']': 1152 self.type = self.type + token[1] 1153 token = self.token() 1154 else: 1155 self.error("parsing array type, ']' expected", token); 1156 return token 1157 elif token != None and token[0] == "sep" and token[1] == ':': 1158 # remove :12 in case it's a limited int size 1159 token = self.token() 1160 token = self.token() 1161 self.lexer.push(token) 1162 token = nametok 1163 1164 return token 1165 1166 # 1167 # Parse a signature: '(' has been parsed and we scan the type definition 1168 # up to the ')' included 1169 def parseSignature(self, token): 1170 signature = [] 1171 if token != None and token[0] == "sep" and token[1] == ')': 1172 self.signature = [] 1173 token = self.token() 1174 return token 1175 while token != None: 1176 token = self.parseType(token) 1177 if token != None and token[0] == "name": 1178 signature.append((self.type, token[1], None)) 1179 token = self.token() 1180 elif token != None and token[0] == "sep" and token[1] == ',': 1181 token = self.token() 1182 continue 1183 elif token != None and token[0] == "sep" and token[1] == ')': 1184 # only the type was provided 1185 if self.type == "...": 1186 signature.append((self.type, "...", None)) 1187 else: 1188 signature.append((self.type, None, None)) 1189 if token != None and token[0] == "sep": 1190 if token[1] == ',': 1191 token = self.token() 1192 continue 1193 elif token[1] == ')': 1194 token = self.token() 1195 break 1196 self.signature = signature 1197 return token 1198 1199 # 1200 # Parse a global definition, be it a type, variable or function 1201 # the extern "C" blocks are a bit nasty and require it to recurse. 1202 # 1203 def parseGlobal(self, token): 1204 static = 0 1205 if token[1] == 'extern': 1206 token = self.token() 1207 if token == None: 1208 return token 1209 if token[0] == 'string': 1210 if token[1] == 'C': 1211 token = self.token() 1212 if token == None: 1213 return token 1214 if token[0] == 'sep' and token[1] == "{": 1215 token = self.token() 1216# print 'Entering extern "C line ', self.lineno() 1217 while token != None and (token[0] != 'sep' or 1218 token[1] != "}"): 1219 if token[0] == 'name': 1220 token = self.parseGlobal(token) 1221 else: 1222 self.error( 1223 "token %s %s unexpected at the top level" % ( 1224 token[0], token[1])) 1225 token = self.parseGlobal(token) 1226# print 'Exiting extern "C" line', self.lineno() 1227 token = self.token() 1228 return token 1229 else: 1230 return token 1231 elif token[1] == 'static': 1232 static = 1 1233 token = self.token() 1234 if token == None or token[0] != 'name': 1235 return token 1236 1237 if token[1] == 'typedef': 1238 token = self.token() 1239 return self.parseTypedef(token) 1240 else: 1241 token = self.parseType(token) 1242 type_orig = self.type 1243 if token == None or token[0] != "name": 1244 return token 1245 type = type_orig 1246 self.name = token[1] 1247 token = self.token() 1248 while token != None and (token[0] == "sep" or token[0] == "op"): 1249 if token[0] == "sep": 1250 if token[1] == "[": 1251 type = type + token[1] 1252 token = self.token() 1253 while token != None and (token[0] != "sep" or \ 1254 token[1] != ";"): 1255 type = type + token[1] 1256 token = self.token() 1257 1258 if token != None and token[0] == "op" and token[1] == "=": 1259 # 1260 # Skip the initialization of the variable 1261 # 1262 token = self.token() 1263 if token[0] == 'sep' and token[1] == '{': 1264 token = self.token() 1265 token = self.parseBlock(token) 1266 else: 1267 self.comment = None 1268 while token != None and (token[0] != "sep" or \ 1269 (token[1] != ';' and token[1] != ',')): 1270 token = self.token() 1271 self.comment = None 1272 if token == None or token[0] != "sep" or (token[1] != ';' and 1273 token[1] != ','): 1274 self.error("missing ';' or ',' after value") 1275 1276 if token != None and token[0] == "sep": 1277 if token[1] == ";": 1278 self.comment = None 1279 token = self.token() 1280 if type == "struct": 1281 self.index_add(self.name, self.filename, 1282 not self.is_header, "struct", self.struct_fields) 1283 else: 1284 self.index_add(self.name, self.filename, 1285 not self.is_header, "variable", type) 1286 break 1287 elif token[1] == "(": 1288 token = self.token() 1289 token = self.parseSignature(token) 1290 if token == None: 1291 return None 1292 if token[0] == "sep" and token[1] == ";": 1293 d = self.mergeFunctionComment(self.name, 1294 ((type, None), self.signature), 1) 1295 self.index_add(self.name, self.filename, static, 1296 "function", d) 1297 token = self.token() 1298 elif token[0] == "sep" and token[1] == "{": 1299 d = self.mergeFunctionComment(self.name, 1300 ((type, None), self.signature), static) 1301 self.index_add(self.name, self.filename, static, 1302 "function", d) 1303 token = self.token() 1304 token = self.parseBlock(token); 1305 elif token[1] == ',': 1306 self.comment = None 1307 self.index_add(self.name, self.filename, static, 1308 "variable", type) 1309 type = type_orig 1310 token = self.token() 1311 while token != None and token[0] == "sep": 1312 type = type + token[1] 1313 token = self.token() 1314 if token != None and token[0] == "name": 1315 self.name = token[1] 1316 token = self.token() 1317 else: 1318 break 1319 1320 return token 1321 1322 def parse(self): 1323 print "Parsing %s" % (self.filename) 1324 token = self.token() 1325 while token != None: 1326 if token[0] == 'name': 1327 token = self.parseGlobal(token) 1328 else: 1329 self.error("token %s %s unexpected at the top level" % ( 1330 token[0], token[1])) 1331 token = self.parseGlobal(token) 1332 return 1333 return self.index 1334 1335 1336class docBuilder: 1337 """A documentation builder""" 1338 def __init__(self, name, directories=['.'], excludes=[]): 1339 self.name = name 1340 self.directories = directories 1341 self.excludes = excludes + ignored_files.keys() 1342 self.modules = {} 1343 self.headers = {} 1344 self.idx = index() 1345 1346 def analyze(self): 1347 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1348 self.idx.analyze() 1349 1350 def scanHeaders(self): 1351 for header in self.headers.keys(): 1352 parser = CParser(header) 1353 idx = parser.parse() 1354 self.headers[header] = idx; 1355 self.idx.merge(idx) 1356 1357 def scanModules(self): 1358 for module in self.modules.keys(): 1359 parser = CParser(module) 1360 idx = parser.parse() 1361 # idx.analyze() 1362 self.modules[module] = idx 1363 self.idx.merge_public(idx) 1364 1365 def scan(self): 1366 for directory in self.directories: 1367 files = glob.glob(directory + "/*.c") 1368 for file in files: 1369 skip = 0 1370 for excl in self.excludes: 1371 if string.find(file, excl) != -1: 1372 skip = 1; 1373 break 1374 if skip == 0: 1375 self.modules[file] = None; 1376 files = glob.glob(directory + "/*.h") 1377 for file in files: 1378 skip = 0 1379 for excl in self.excludes: 1380 if string.find(file, excl) != -1: 1381 skip = 1; 1382 break 1383 if skip == 0: 1384 self.headers[file] = None; 1385 self.scanHeaders() 1386 self.scanModules() 1387 1388 def modulename_file(self, file): 1389 module = os.path.basename(file) 1390 if module[-2:] == '.h': 1391 module = module[:-2] 1392 return module 1393 1394 def serialize_enum(self, output, name): 1395 id = self.idx.enums[name] 1396 output.write(" <enum name='%s' file='%s'" % (name, 1397 self.modulename_file(id.module))) 1398 if id.info != None: 1399 info = id.info 1400 if info[0] != None and info[0] != '': 1401 try: 1402 val = eval(info[0]) 1403 except: 1404 val = info[0] 1405 output.write(" value='%s'" % (val)); 1406 if info[2] != None and info[2] != '': 1407 output.write(" type='%s'" % info[2]); 1408 if info[1] != None and info[1] != '': 1409 output.write(" info='%s'" % escape(info[1])); 1410 output.write("/>\n") 1411 1412 def serialize_macro(self, output, name): 1413 id = self.idx.macros[name] 1414 output.write(" <macro name='%s' file='%s'>\n" % (name, 1415 self.modulename_file(id.module))) 1416 if id.info != None: 1417 try: 1418 (args, desc) = id.info 1419 if desc != None and desc != "": 1420 output.write(" <info>%s</info>\n" % (escape(desc))) 1421 for arg in args: 1422 (name, desc) = arg 1423 if desc != None and desc != "": 1424 output.write(" <arg name='%s' info='%s'/>\n" % ( 1425 name, escape(desc))) 1426 else: 1427 output.write(" <arg name='%s'/>\n" % (name)) 1428 except: 1429 pass 1430 output.write(" </macro>\n") 1431 1432 def serialize_typedef(self, output, name): 1433 id = self.idx.typedefs[name] 1434 if id.info[0:7] == 'struct ': 1435 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1436 name, self.modulename_file(id.module), id.info)) 1437 name = id.info[7:] 1438 if self.idx.structs.has_key(name) and ( \ 1439 type(self.idx.structs[name].info) == type(()) or 1440 type(self.idx.structs[name].info) == type([])): 1441 output.write(">\n"); 1442 try: 1443 for field in self.idx.structs[name].info: 1444 desc = field[2] 1445 if desc == None: 1446 desc = '' 1447 else: 1448 desc = escape(desc) 1449 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1450 except: 1451 print "Failed to serialize struct %s" % (name) 1452 output.write(" </struct>\n") 1453 else: 1454 output.write("/>\n"); 1455 else : 1456 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( 1457 name, self.modulename_file(id.module), id.info)) 1458 1459 def serialize_variable(self, output, name): 1460 id = self.idx.variables[name] 1461 if id.info != None: 1462 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1463 name, self.modulename_file(id.module), id.info)) 1464 else: 1465 output.write(" <variable name='%s' file='%s'/>\n" % ( 1466 name, self.modulename_file(id.module))) 1467 1468 def serialize_function(self, output, name): 1469 id = self.idx.functions[name] 1470 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, 1471 self.modulename_file(id.module))) 1472 try: 1473 (ret, params, desc) = id.info 1474 output.write(" <info>%s</info>\n" % (escape(desc))) 1475 if ret[0] != None: 1476 if ret[0] == "void": 1477 output.write(" <return type='void'/>\n") 1478 else: 1479 output.write(" <return type='%s' info='%s'/>\n" % ( 1480 ret[0], escape(ret[1]))) 1481 for param in params: 1482 if param[0] == 'void': 1483 continue 1484 if param[2] == None: 1485 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1486 else: 1487 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1488 except: 1489 print "Failed to save function %s info: " % name, `id.info` 1490 output.write(" </%s>\n" % (id.type)) 1491 1492 def serialize_exports(self, output, file): 1493 module = self.modulename_file(file) 1494 output.write(" <file name='%s'>\n" % (module)) 1495 dict = self.headers[file] 1496 ids = dict.functions.keys() + dict.variables.keys() + \ 1497 dict.macros.keys() + dict.typedefs.keys() + \ 1498 dict.structs.keys() + dict.enums.keys() 1499 ids.sort() 1500 for id in ids: 1501 output.write(" <exports symbol='%s'/>\n" % (id)) 1502 output.write(" </file>\n") 1503 1504 1505 def serialize(self, filename = None): 1506 if filename == None: 1507 filename = "%s-api.xml" % self.name 1508 print "Saving XML description %s" % (filename) 1509 output = open(filename, "w") 1510 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1511 output.write("<api name='%s'>\n" % self.name) 1512 output.write(" <files>\n") 1513 for file in self.headers.keys(): 1514 self.serialize_exports(output, file) 1515 output.write(" </files>\n") 1516 output.write(" <symbols>\n") 1517 macros = self.idx.macros.keys() 1518 macros.sort() 1519 for macro in macros: 1520 self.serialize_macro(output, macro) 1521 enums = self.idx.enums.keys() 1522 enums.sort() 1523 for enum in enums: 1524 self.serialize_enum(output, enum) 1525 typedefs = self.idx.typedefs.keys() 1526 typedefs.sort() 1527 for typedef in typedefs: 1528 self.serialize_typedef(output, typedef) 1529 variables = self.idx.variables.keys() 1530 variables.sort() 1531 for variable in variables: 1532 self.serialize_variable(output, variable) 1533 functions = self.idx.functions.keys() 1534 functions.sort() 1535 for function in functions: 1536 self.serialize_function(output, function) 1537 output.write(" </symbols>\n") 1538 output.write("</api>\n") 1539 output.close() 1540 1541 1542def rebuild(): 1543 builder = None 1544 if glob.glob("../parser.c") != [] : 1545 print "Rebuilding API description for libxml2" 1546 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1547 ["xmlwin32version.h", "tst.c"]) 1548 elif glob.glob("../libxslt/transform.c") != [] : 1549 print "Rebuilding API description for libxslt" 1550 builder = docBuilder("libxslt", ["../libxslt"], 1551 ["win32config.h", "libxslt.h", "tst.c"]) 1552 else: 1553 print "rebuild() failed, unable to guess the module" 1554 return None 1555 builder.scan() 1556 builder.analyze() 1557 builder.serialize() 1558 if glob.glob("../libexslt/exslt.c") != [] : 1559 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1560 extra.scan() 1561 extra.analyze() 1562 extra.serialize() 1563 return builder 1564 1565# 1566# for debugging the parser 1567# 1568def parse(filename): 1569 parser = CParser(filename) 1570 idx = parser.parse() 1571 return idx 1572 1573if __name__ == "__main__": 1574 rebuild() 1575