apibuild.py revision d433046ae52b0488a9fad77d2045c0139b8cada8
1#!/usr/bin/python -u 2# 3# This is the API builder, it parses the C sources and build the 4# API formal description in XML. 5# 6# See Copyright for the status of this software. 7# 8# daniel@veillard.com 9# 10import os, sys 11import string 12import glob 13 14# 15# C parser analysis code 16# 17ignored_files = { 18 "trio": "too many non standard macros", 19 "trio.c": "too many non standard macros", 20 "trionan.c": "too many non standard macros", 21 "triostr.c": "too many non standard macros", 22 "acconfig.h": "generated portability layer", 23 "config.h": "generated portability layer", 24 "libxml.h": "internal only", 25 "testOOM.c": "out of memory tester", 26 "testOOMlib.h": "out of memory tester", 27 "testOOMlib.c": "out of memory tester", 28} 29 30ignored_words = { 31 "WINAPI": (0, "Windows keyword"), 32 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"), 33 "__declspec": (3, "Windows keyword"), 34 "ATTRIBUTE_UNUSED": (0, "macro keyword"), 35 "LIBEXSLT_PUBLIC": (0, "macro keyword"), 36 "X_IN_Y": (5, "macro function builder"), 37} 38 39def escape(raw): 40 raw = string.replace(raw, '&', '&') 41 raw = string.replace(raw, '<', '<') 42 raw = string.replace(raw, '>', '>') 43 raw = string.replace(raw, "'", ''') 44 raw = string.replace(raw, '"', '"') 45 return raw 46 47class identifier: 48 def __init__(self, name, module=None, type=None, info=None, extra=None): 49 self.name = name 50 self.module = module 51 self.type = type 52 self.info = info 53 self.extra = extra 54 self.static = 0 55 56 def __repr__(self): 57 r = "%s %s:" % (self.type, self.name) 58 if self.static: 59 r = r + " static" 60 if self.module != None: 61 r = r + " from %s" % (self.module) 62 if self.info != None: 63 r = r + " " + `self.info` 64 if self.extra != None: 65 r = r + " " + `self.extra` 66 return r 67 68 69 def set_module(self, module): 70 self.module = module 71 def set_type(self, type): 72 self.type = type 73 def set_info(self, info): 74 self.info = info 75 def set_extra(self, extra): 76 self.extra = extra 77 def set_static(self, static): 78 self.static = static 79 80 def update(self, module, type = None, info = None, extra=None): 81 if module != None and self.module == None: 82 self.set_module(module) 83 if type != None and self.type == None: 84 self.set_type(type) 85 if info != None: 86 self.set_info(info) 87 if extra != None: 88 self.set_extra(extra) 89 90 91class index: 92 def __init__(self, name = "noname"): 93 self.name = name; 94 self.identifiers = {} 95 self.functions = {} 96 self.variables = {} 97 self.includes = {} 98 self.structs = {} 99 self.enums = {} 100 self.typedefs = {} 101 self.macros = {} 102 self.references = {} 103 104 def add(self, name, module, static, type, info=None, extra=None): 105 if name[0:2] == '__': 106 return None 107 d = None 108 try: 109 d = self.identifiers[name] 110 d.update(module, type, info, extra) 111 except: 112 d = identifier(name, module, type, info, extra) 113 self.identifiers[name] = d 114 115 if d != None and static == 1: 116 d.set_static(1) 117 118 if d != None and name != None and type != None: 119 if type == "function": 120 self.functions[name] = d 121 elif type == "functype": 122 self.functions[name] = d 123 elif type == "variable": 124 self.variables[name] = d 125 elif type == "include": 126 self.includes[name] = d 127 elif type == "struct": 128 self.structs[name] = d 129 elif type == "enum": 130 self.enums[name] = d 131 elif type == "typedef": 132 self.typedefs[name] = d 133 elif type == "macro": 134 self.macros[name] = d 135 else: 136 print "Unable to register type ", type 137 return d 138 139 def merge(self, idx): 140 for id in idx.functions.keys(): 141 # 142 # macro might be used to override functions or variables 143 # definitions 144 # 145 if self.macros.has_key(id): 146 del self.macros[id] 147 if self.functions.has_key(id): 148 print "function %s from %s redeclared in %s" % ( 149 id, self.functions[id].module, idx.functions[id].module) 150 else: 151 self.functions[id] = idx.functions[id] 152 self.identifiers[id] = idx.functions[id] 153 for id in idx.variables.keys(): 154 # 155 # macro might be used to override functions or variables 156 # definitions 157 # 158 if self.macros.has_key(id): 159 del self.macros[id] 160 if self.variables.has_key(id): 161 print "variable %s from %s redeclared in %s" % ( 162 id, self.variables[id].module, idx.variables[id].module) 163 else: 164 self.variables[id] = idx.variables[id] 165 self.identifiers[id] = idx.variables[id] 166 for id in idx.structs.keys(): 167 if self.structs.has_key(id): 168 print "struct %s from %s redeclared in %s" % ( 169 id, self.structs[id].module, idx.structs[id].module) 170 else: 171 self.structs[id] = idx.structs[id] 172 self.identifiers[id] = idx.structs[id] 173 for id in idx.typedefs.keys(): 174 if self.typedefs.has_key(id): 175 print "typedef %s from %s redeclared in %s" % ( 176 id, self.typedefs[id].module, idx.typedefs[id].module) 177 else: 178 self.typedefs[id] = idx.typedefs[id] 179 self.identifiers[id] = idx.typedefs[id] 180 for id in idx.macros.keys(): 181 # 182 # macro might be used to override functions or variables 183 # definitions 184 # 185 if self.variables.has_key(id): 186 continue 187 if self.functions.has_key(id): 188 continue 189 if self.enums.has_key(id): 190 continue 191 if self.macros.has_key(id): 192 print "macro %s from %s redeclared in %s" % ( 193 id, self.macros[id].module, idx.macros[id].module) 194 else: 195 self.macros[id] = idx.macros[id] 196 self.identifiers[id] = idx.macros[id] 197 for id in idx.enums.keys(): 198 if self.enums.has_key(id): 199 print "enum %s from %s redeclared in %s" % ( 200 id, self.enums[id].module, idx.enums[id].module) 201 else: 202 self.enums[id] = idx.enums[id] 203 self.identifiers[id] = idx.enums[id] 204 205 def merge_public(self, idx): 206 for id in idx.functions.keys(): 207 if self.functions.has_key(id): 208 up = idx.functions[id] 209 self.functions[id].update(None, up.type, up.info, up.extra) 210 # else: 211 # print "Function %s from %s is not declared in headers" % ( 212 # id, idx.functions[id].module) 213 # TODO: do the same for variables. 214 215 def analyze_dict(self, type, dict): 216 count = 0 217 public = 0 218 for name in dict.keys(): 219 id = dict[name] 220 count = count + 1 221 if id.static == 0: 222 public = public + 1 223 if count != public: 224 print " %d %s , %d public" % (count, type, public) 225 elif count != 0: 226 print " %d public %s" % (count, type) 227 228 229 def analyze(self): 230 self.analyze_dict("functions", self.functions) 231 self.analyze_dict("variables", self.variables) 232 self.analyze_dict("structs", self.structs) 233 self.analyze_dict("typedefs", self.typedefs) 234 self.analyze_dict("macros", self.macros) 235 236class CLexer: 237 """A lexer for the C language, tokenize the input by reading and 238 analyzing it line by line""" 239 def __init__(self, input): 240 self.input = input 241 self.tokens = [] 242 self.line = "" 243 self.lineno = 0 244 245 def getline(self): 246 line = '' 247 while line == '': 248 line = self.input.readline() 249 if not line: 250 return None 251 self.lineno = self.lineno + 1 252 line = string.lstrip(line) 253 line = string.rstrip(line) 254 if line == '': 255 continue 256 while line[-1] == '\\': 257 line = line[:-1] 258 n = self.input.readline() 259 self.lineno = self.lineno + 1 260 n = string.lstrip(n) 261 n = string.rstrip(n) 262 if not n: 263 break 264 else: 265 line = line + n 266 return line 267 268 def getlineno(self): 269 return self.lineno 270 271 def push(self, token): 272 self.tokens.insert(0, token); 273 274 def debug(self): 275 print "Last token: ", self.last 276 print "Token queue: ", self.tokens 277 print "Line %d end: " % (self.lineno), self.line 278 279 def token(self): 280 while self.tokens == []: 281 if self.line == "": 282 line = self.getline() 283 else: 284 line = self.line 285 self.line = "" 286 if line == None: 287 return None 288 289 if line[0] == '#': 290 self.tokens = map((lambda x: ('preproc', x)), 291 string.split(line)) 292 break; 293 l = len(line) 294 if line[0] == '"' or line[0] == "'": 295 end = line[0] 296 line = line[1:] 297 found = 0 298 tok = "" 299 while found == 0: 300 i = 0 301 l = len(line) 302 while i < l: 303 if line[i] == end: 304 self.line = line[i+1:] 305 line = line[:i] 306 l = i 307 found = 1 308 break 309 if line[i] == '\\': 310 i = i + 1 311 i = i + 1 312 tok = tok + line 313 if found == 0: 314 line = self.getline() 315 if line == None: 316 return None 317 self.last = ('string', tok) 318 return self.last 319 320 if l >= 2 and line[0] == '/' and line[1] == '*': 321 line = line[2:] 322 found = 0 323 tok = "" 324 while found == 0: 325 i = 0 326 l = len(line) 327 while i < l: 328 if line[i] == '*' and i+1 < l and line[i+1] == '/': 329 self.line = line[i+2:] 330 line = line[:i-1] 331 l = i 332 found = 1 333 break 334 i = i + 1 335 if tok != "": 336 tok = tok + "\n" 337 tok = tok + line 338 if found == 0: 339 line = self.getline() 340 if line == None: 341 return None 342 self.last = ('comment', tok) 343 return self.last 344 if l >= 2 and line[0] == '/' and line[1] == '/': 345 line = line[2:] 346 self.last = ('comment', line) 347 return self.last 348 i = 0 349 while i < l: 350 if line[i] == '/' and i+1 < l and line[i+1] == '/': 351 self.line = line[i:] 352 line = line[:i] 353 break 354 if line[i] == '/' and i+1 < l and line[i+1] == '*': 355 self.line = line[i:] 356 line = line[:i] 357 break 358 if line[i] == '"' or line[i] == "'": 359 self.line = line[i:] 360 line = line[:i] 361 break 362 i = i + 1 363 l = len(line) 364 i = 0 365 while i < l: 366 if line[i] == ' ' or line[i] == '\t': 367 i = i + 1 368 continue 369 o = ord(line[i]) 370 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 371 (o >= 48 and o <= 57): 372 s = i 373 while i < l: 374 o = ord(line[i]) 375 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 376 (o >= 48 and o <= 57) or string.find( 377 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: 378 i = i + 1 379 else: 380 break 381 self.tokens.append(('name', line[s:i])) 382 continue 383 if string.find("(){}:;,[]", line[i]) != -1: 384# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ 385# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ 386# line[i] == ',' or line[i] == '[' or line[i] == ']': 387 self.tokens.append(('sep', line[i])) 388 i = i + 1 389 continue 390 if string.find("+-*><=/%&!|.", line[i]) != -1: 391# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ 392# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ 393# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ 394# line[i] == '!' or line[i] == '|' or line[i] == '.': 395 if line[i] == '.' and i + 2 < l and \ 396 line[i+1] == '.' and line[i+2] == '.': 397 self.tokens.append(('name', '...')) 398 i = i + 3 399 continue 400 401 j = i + 1 402 if j < l and ( 403 string.find("+-*><=/%&!|", line[j]) != -1): 404# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ 405# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ 406# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ 407# line[j] == '!' or line[j] == '|'): 408 self.tokens.append(('op', line[i:j+1])) 409 i = j + 1 410 else: 411 self.tokens.append(('op', line[i])) 412 i = i + 1 413 continue 414 s = i 415 while i < l: 416 o = ord(line[i]) 417 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ 418 (o >= 48 and o <= 57) or ( 419 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): 420# line[i] != ' ' and line[i] != '\t' and 421# line[i] != '(' and line[i] != ')' and 422# line[i] != '{' and line[i] != '}' and 423# line[i] != ':' and line[i] != ';' and 424# line[i] != ',' and line[i] != '+' and 425# line[i] != '-' and line[i] != '*' and 426# line[i] != '/' and line[i] != '%' and 427# line[i] != '&' and line[i] != '!' and 428# line[i] != '|' and line[i] != '[' and 429# line[i] != ']' and line[i] != '=' and 430# line[i] != '*' and line[i] != '>' and 431# line[i] != '<'): 432 i = i + 1 433 else: 434 break 435 self.tokens.append(('name', line[s:i])) 436 437 tok = self.tokens[0] 438 self.tokens = self.tokens[1:] 439 self.last = tok 440 return tok 441 442class CParser: 443 """The C module parser""" 444 def __init__(self, filename, idx = None): 445 self.filename = filename 446 if len(filename) > 2 and filename[-2:] == '.h': 447 self.is_header = 1 448 else: 449 self.is_header = 0 450 self.input = open(filename) 451 self.lexer = CLexer(self.input) 452 if idx == None: 453 self.index = index() 454 else: 455 self.index = idx 456 self.top_comment = "" 457 self.last_comment = "" 458 self.comment = None 459 460 def lineno(self): 461 return self.lexer.getlineno() 462 463 def error(self, msg, token=-1): 464 print "Parse Error: " + msg 465 if token != -1: 466 print "Got token ", token 467 self.lexer.debug() 468 sys.exit(1) 469 470 def debug(self, msg, token=-1): 471 print "Debug: " + msg 472 if token != -1: 473 print "Got token ", token 474 self.lexer.debug() 475 476 def parseComment(self, token): 477 if self.top_comment == "": 478 self.top_comment = token[1] 479 if self.comment == None or token[1][0] == '*': 480 self.comment = token[1]; 481 else: 482 self.comment = self.comment + token[1] 483 token = self.lexer.token() 484 return token 485 486 # 487 # Parse a comment block associate to a macro 488 # 489 def parseMacroComment(self, name, quiet = 0): 490 if name[0:2] == '__': 491 quiet = 1 492 493 args = [] 494 desc = "" 495 496 if self.comment == None: 497 if not quiet: 498 print "Missing comment for macro %s" % (name) 499 return((args, desc)) 500 if self.comment[0] != '*': 501 if not quiet: 502 print "Missing * in macro comment for %s" % (name) 503 return((args, desc)) 504 lines = string.split(self.comment, '\n') 505 if lines[0] == '*': 506 del lines[0] 507 if lines[0] != "* %s:" % (name): 508 if not quiet: 509 print "Misformatted macro comment for %s" % (name) 510 print " Expecting '* %s:' got '%s'" % (name, lines[0]) 511 return((args, desc)) 512 del lines[0] 513 while lines[0] == '*': 514 del lines[0] 515 while len(lines) > 0 and lines[0][0:3] == '* @': 516 l = lines[0][3:] 517 try: 518 (arg, desc) = string.split(l, ':', 1) 519 desc=string.strip(desc) 520 arg=string.strip(arg) 521 except: 522 if not quiet: 523 print "Misformatted macro comment for %s" % (name) 524 print " problem with '%s'" % (lines[0]) 525 del lines[0] 526 continue 527 del lines[0] 528 l = string.strip(lines[0]) 529 while len(l) > 2 and l[0:3] != '* @': 530 while l[0] == '*': 531 l = l[1:] 532 desc = desc + ' ' + string.strip(l) 533 del lines[0] 534 if len(lines) == 0: 535 break 536 l = lines[0] 537 args.append((arg, desc)) 538 while len(lines) > 0 and lines[0] == '*': 539 del lines[0] 540 desc = "" 541 while len(lines) > 0: 542 l = lines[0] 543 while len(l) > 0 and l[0] == '*': 544 l = l[1:] 545 l = string.strip(l) 546 desc = desc + " " + l 547 del lines[0] 548 549 desc = string.strip(desc) 550 551 if quiet == 0: 552 if desc == "": 553 print "Macro comment for %s lack description of the macro" % (name) 554 555 return((args, desc)) 556 557 # 558 # Parse a comment block and merge the informations found in the 559 # parameters descriptions, finally returns a block as complete 560 # as possible 561 # 562 def mergeFunctionComment(self, name, description, quiet = 0): 563 if name == 'main': 564 quiet = 1 565 if name[0:2] == '__': 566 quiet = 1 567 568 (ret, args) = description 569 desc = "" 570 retdesc = "" 571 572 if self.comment == None: 573 if not quiet: 574 print "Missing comment for function %s" % (name) 575 return(((ret[0], retdesc), args, desc)) 576 if self.comment[0] != '*': 577 if not quiet: 578 print "Missing * in function comment for %s" % (name) 579 return(((ret[0], retdesc), args, desc)) 580 lines = string.split(self.comment, '\n') 581 if lines[0] == '*': 582 del lines[0] 583 if lines[0] != "* %s:" % (name): 584 if not quiet: 585 print "Misformatted function comment for %s" % (name) 586 print " Expecting '* %s:' got '%s'" % (name, lines[0]) 587 return(((ret[0], retdesc), args, desc)) 588 del lines[0] 589 while lines[0] == '*': 590 del lines[0] 591 nbargs = len(args) 592 while len(lines) > 0 and lines[0][0:3] == '* @': 593 l = lines[0][3:] 594 try: 595 (arg, desc) = string.split(l, ':', 1) 596 desc=string.strip(desc) 597 arg=string.strip(arg) 598 except: 599 if not quiet: 600 print "Misformatted function comment for %s" % (name) 601 print " problem with '%s'" % (lines[0]) 602 del lines[0] 603 continue 604 del lines[0] 605 l = string.strip(lines[0]) 606 while len(l) > 2 and l[0:3] != '* @': 607 while l[0] == '*': 608 l = l[1:] 609 desc = desc + ' ' + string.strip(l) 610 del lines[0] 611 if len(lines) == 0: 612 break 613 l = lines[0] 614 i = 0 615 while i < nbargs: 616 if args[i][1] == arg: 617 args[i] = (args[i][0], arg, desc) 618 break; 619 i = i + 1 620 if i >= nbargs: 621 if not quiet: 622 print "Uname to find arg %s from function comment for %s" % ( 623 arg, name) 624 while len(lines) > 0 and lines[0] == '*': 625 del lines[0] 626 desc = "" 627 while len(lines) > 0: 628 l = lines[0] 629 while len(l) > 0 and l[0] == '*': 630 l = l[1:] 631 l = string.strip(l) 632 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": 633 try: 634 l = string.split(l, ' ', 1)[1] 635 except: 636 l = "" 637 retdesc = string.strip(l) 638 del lines[0] 639 while len(lines) > 0: 640 l = lines[0] 641 while len(l) > 0 and l[0] == '*': 642 l = l[1:] 643 l = string.strip(l) 644 retdesc = retdesc + " " + l 645 del lines[0] 646 else: 647 desc = desc + " " + l 648 del lines[0] 649 650 retdesc = string.strip(retdesc) 651 desc = string.strip(desc) 652 653 if quiet == 0: 654 # 655 # report missing comments 656 # 657 i = 0 658 while i < nbargs: 659 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: 660 print "Function comment for %s lack description of arg %s" % (name, args[i][1]) 661 i = i + 1 662 if retdesc == "" and ret[0] != "void": 663 print "Function comment for %s lack description of return value" % (name) 664 if desc == "": 665 print "Function comment for %s lack description of the function" % (name) 666 667 668 return(((ret[0], retdesc), args, desc)) 669 670 def parsePreproc(self, token): 671 name = token[1] 672 if name == "#include": 673 token = self.lexer.token() 674 if token == None: 675 return None 676 if token[0] == 'preproc': 677 self.index.add(token[1], self.filename, not self.is_header, 678 "include") 679 return self.lexer.token() 680 return token 681 if name == "#define": 682 token = self.lexer.token() 683 if token == None: 684 return None 685 if token[0] == 'preproc': 686 # TODO macros with arguments 687 name = token[1] 688 lst = [] 689 token = self.lexer.token() 690 while token != None and token[0] == 'preproc' and \ 691 token[1][0] != '#': 692 lst.append(token[1]) 693 token = self.lexer.token() 694 try: 695 name = string.split(name, '(') [0] 696 except: 697 pass 698 info = self.parseMacroComment(name, not self.is_header) 699 self.index.add(name, self.filename, not self.is_header, 700 "macro", info) 701 return token 702 token = self.lexer.token() 703 while token != None and token[0] == 'preproc' and \ 704 token[1][0] != '#': 705 token = self.lexer.token() 706 return token 707 708 # 709 # token acquisition on top of the lexer, it handle internally 710 # preprocessor and comments since they are logically not part of 711 # the program structure. 712 # 713 def token(self): 714 global ignored_words 715 716 token = self.lexer.token() 717 while token != None: 718 if token[0] == 'comment': 719 token = self.parseComment(token) 720 continue 721 elif token[0] == 'preproc': 722 token = self.parsePreproc(token) 723 continue 724 elif token[0] == "name" and ignored_words.has_key(token[1]): 725 (n, info) = ignored_words[token[1]] 726 i = 0 727 while i < n: 728 token = self.lexer.token() 729 i = i + 1 730 token = self.lexer.token() 731 continue 732 else: 733 #print "=> ", token 734 return token 735 return None 736 737 # 738 # Parse a typedef, it records the type and its name. 739 # 740 def parseTypedef(self, token): 741 if token == None: 742 return None 743 token = self.parseType(token) 744 if token == None: 745 self.error("parsing typedef") 746 return None 747 base_type = self.type 748 type = base_type 749 #self.debug("end typedef type", token) 750 while token != None: 751 if token[0] == "name": 752 name = token[1] 753 signature = self.signature 754 if signature != None: 755 type = string.split(type, '(')[0] 756 d = self.mergeFunctionComment(name, 757 ((type, None), signature), 1) 758 self.index.add(name, self.filename, not self.is_header, 759 "functype", d) 760 else: 761 if base_type == "struct": 762 self.index.add(name, self.filename, not self.is_header, 763 "struct", type) 764 base_type = "struct " + name 765 else: 766 self.index.add(name, self.filename, not self.is_header, 767 "typedef", type) 768 token = self.token() 769 else: 770 self.error("parsing typedef: expecting a name") 771 return token 772 #self.debug("end typedef", token) 773 if token != None and token[0] == 'sep' and token[1] == ',': 774 type = base_type 775 token = self.token() 776 while token != None and token[0] == "op": 777 type = type + token[1] 778 token = self.token() 779 elif token != None and token[0] == 'sep' and token[1] == ';': 780 break; 781 elif token != None and token[0] == 'name': 782 type = base_type 783 continue; 784 else: 785 self.error("parsing typedef: expecting ';'", token) 786 return token 787 token = self.token() 788 return token 789 790 # 791 # Parse a C code block, used for functions it parse till 792 # the balancing } included 793 # 794 def parseBlock(self, token): 795 while token != None: 796 if token[0] == "sep" and token[1] == "{": 797 token = self.token() 798 token = self.parseBlock(token) 799 elif token[0] == "sep" and token[1] == "}": 800 self.comment = None 801 token = self.token() 802 return token 803 else: 804 token = self.token() 805 return token 806 807 # 808 # Parse a C struct definition till the balancing } 809 # 810 def parseStruct(self, token): 811 fields = [] 812 #self.debug("start parseStruct", token) 813 while token != None: 814 if token[0] == "sep" and token[1] == "{": 815 token = self.token() 816 token = self.parseTypeBlock(token) 817 elif token[0] == "sep" and token[1] == "}": 818 self.struct_fields = fields 819 #self.debug("end parseStruct", token) 820 #print fields 821 token = self.token() 822 return token 823 else: 824 base_type = self.type 825 #self.debug("before parseType", token) 826 token = self.parseType(token) 827 #self.debug("after parseType", token) 828 if token != None and token[0] == "name": 829 fname = token[1] 830 token = self.token() 831 if token[0] == "sep" and token[1] == ";": 832 self.comment = None 833 token = self.token() 834 fields.append((self.type, fname, self.comment)) 835 self.comment = None 836 else: 837 self.error("parseStruct: expecting ;", token) 838 elif token != None and token[0] == "sep" and token[1] == "{": 839 token = self.token() 840 token = self.parseTypeBlock(token) 841 if token != None and token[0] == "name": 842 token = self.token() 843 if token != None and token[0] == "sep" and token[1] == ";": 844 token = self.token() 845 else: 846 self.error("parseStruct: expecting ;", token) 847 else: 848 self.error("parseStruct: name", token) 849 token = self.token() 850 self.type = base_type; 851 self.struct_fields = fields 852 #self.debug("end parseStruct", token) 853 #print fields 854 return token 855 856 # 857 # Parse a C enum block, parse till the balancing } 858 # 859 def parseEnumBlock(self, token): 860 self.enums = [] 861 name = None 862 self.comment = None 863 comment = "" 864 value = "0" 865 while token != None: 866 if token[0] == "sep" and token[1] == "{": 867 token = self.token() 868 token = self.parseTypeBlock(token) 869 elif token[0] == "sep" and token[1] == "}": 870 if name != None: 871 if self.comment != None: 872 comment = self.comment 873 self.comment = None 874 self.enums.append((name, value, comment)) 875 token = self.token() 876 return token 877 elif token[0] == "name": 878 if name != None: 879 if self.comment != None: 880 comment = string.strip(self.comment) 881 self.comment = None 882 self.enums.append((name, value, comment)) 883 name = token[1] 884 comment = "" 885 token = self.token() 886 if token[0] == "op" and token[1][0] == "=": 887 value = "" 888 if len(token[1]) > 1: 889 value = token[1][1:] 890 token = self.token() 891 while token[0] != "sep" or (token[1] != ',' and 892 token[1] != '}'): 893 value = value + token[1] 894 token = self.token() 895 else: 896 try: 897 value = "%d" % (int(value) + 1) 898 except: 899 print "Failed to compute value of enum %s" % (name) 900 value="" 901 if token[0] == "sep" and token[1] == ",": 902 token = self.token() 903 else: 904 token = self.token() 905 return token 906 907 # 908 # Parse a C definition block, used for structs it parse till 909 # the balancing } 910 # 911 def parseTypeBlock(self, token): 912 while token != None: 913 if token[0] == "sep" and token[1] == "{": 914 token = self.token() 915 token = self.parseTypeBlock(token) 916 elif token[0] == "sep" and token[1] == "}": 917 token = self.token() 918 return token 919 else: 920 token = self.token() 921 return token 922 923 # 924 # Parse a type: the fact that the type name can either occur after 925 # the definition or within the definition makes it a little harder 926 # if inside, the name token is pushed back before returning 927 # 928 def parseType(self, token): 929 self.type = "" 930 self.struct_fields = [] 931 self.signature = None 932 if token == None: 933 return token 934 935 while token[0] == "name" and ( 936 token[1] == "const" or token[1] == "unsigned"): 937 if self.type == "": 938 self.type = token[1] 939 else: 940 self.type = self.type + " " + token[1] 941 token = self.token() 942 943 if token[0] == "name" and (token[1] == "long" or token[1] == "short"): 944 if self.type == "": 945 self.type = token[1] 946 else: 947 self.type = self.type + " " + token[1] 948 if token[0] == "name" and token[1] == "int": 949 if self.type == "": 950 self.type = tmp[1] 951 else: 952 self.type = self.type + " " + tmp[1] 953 954 elif token[0] == "name" and token[1] == "struct": 955 if self.type == "": 956 self.type = token[1] 957 else: 958 self.type = self.type + " " + token[1] 959 token = self.token() 960 nametok = None 961 if token[0] == "name": 962 nametok = token 963 token = self.token() 964 if token != None and token[0] == "sep" and token[1] == "{": 965 token = self.token() 966 token = self.parseStruct(token) 967 elif token != None and token[0] == "op" and token[1] == "*": 968 self.type = self.type + " " + nametok[1] + " *" 969 token = self.token() 970 while token != None and token[0] == "op" and token[1] == "*": 971 self.type = self.type + " *" 972 token = self.token() 973 if token[0] == "name": 974 nametok = token 975 token = self.token() 976 else: 977 self.error("struct : expecting name", token) 978 return token 979 elif token != None and token[0] == "name" and nametok != None: 980 self.type = self.type + " " + nametok[1] 981 return token 982 983 if nametok != None: 984 self.lexer.push(token) 985 token = nametok 986 return token 987 988 elif token[0] == "name" and token[1] == "enum": 989 if self.type == "": 990 self.type = token[1] 991 else: 992 self.type = self.type + " " + token[1] 993 self.enums = [] 994 token = self.token() 995 if token != None and token[0] == "sep" and token[1] == "{": 996 token = self.token() 997 token = self.parseEnumBlock(token) 998 else: 999 self.error("parsing enum: expecting '{'", token) 1000 enum_type = None 1001 if token != None and token[0] != "name": 1002 self.lexer.push(token) 1003 token = ("name", "enum") 1004 else: 1005 enum_type = token[1] 1006 for enum in self.enums: 1007 self.index.add(enum[0], self.filename, 1008 not self.is_header, "enum", 1009 (enum[1], enum[2], enum_type)) 1010 return token 1011 1012 elif token[0] == "name": 1013 if self.type == "": 1014 self.type = token[1] 1015 else: 1016 self.type = self.type + " " + token[1] 1017 else: 1018 self.error("parsing type %s: expecting a name" % (self.type), 1019 token) 1020 return token 1021 token = self.token() 1022 while token != None and (token[0] == "op" or 1023 token[0] == "name" and token[1] == "const"): 1024 self.type = self.type + " " + token[1] 1025 token = self.token() 1026 1027 # 1028 # if there is a parenthesis here, this means a function type 1029 # 1030 if token != None and token[0] == "sep" and token[1] == '(': 1031 self.type = self.type + token[1] 1032 token = self.token() 1033 while token != None and token[0] == "op" and token[1] == '*': 1034 self.type = self.type + token[1] 1035 token = self.token() 1036 if token == None or token[0] != "name" : 1037 self.error("parsing function type, name expected", token); 1038 return token 1039 self.type = self.type + token[1] 1040 nametok = token 1041 token = self.token() 1042 if token != None and token[0] == "sep" and token[1] == ')': 1043 self.type = self.type + token[1] 1044 token = self.token() 1045 if token != None and token[0] == "sep" and token[1] == '(': 1046 token = self.token() 1047 type = self.type; 1048 token = self.parseSignature(token); 1049 self.type = type; 1050 else: 1051 self.error("parsing function type, '(' expected", token); 1052 return token 1053 else: 1054 self.error("parsing function type, ')' expected", token); 1055 return token 1056 self.lexer.push(token) 1057 token = nametok 1058 return token 1059 1060 # 1061 # do some lookahead for arrays 1062 # 1063 if token != None and token[0] == "name": 1064 nametok = token 1065 token = self.token() 1066 if token != None and token[0] == "sep" and token[1] == '[': 1067 self.type = self.type + nametok[1] 1068 while token != None and token[0] == "sep" and token[1] == '[': 1069 self.type = self.type + token[1] 1070 token = self.token() 1071 while token != None and token[0] != 'sep' and \ 1072 token[1] != ']' and token[1] != ';': 1073 self.type = self.type + token[1] 1074 token = self.token() 1075 if token != None and token[0] == 'sep' and token[1] == ']': 1076 self.type = self.type + token[1] 1077 token = self.token() 1078 else: 1079 self.error("parsing array type, ']' expected", token); 1080 return token 1081 elif token != None and token[0] == "sep" and token[1] == ':': 1082 # remove :12 in case it's a limited int size 1083 token = self.token() 1084 token = self.token() 1085 self.lexer.push(token) 1086 token = nametok 1087 1088 return token 1089 1090 # 1091 # Parse a signature: '(' has been parsed and we scan the type definition 1092 # up to the ')' included 1093 def parseSignature(self, token): 1094 signature = [] 1095 if token != None and token[0] == "sep" and token[1] == ')': 1096 self.signature = [] 1097 token = self.token() 1098 return token 1099 while token != None: 1100 token = self.parseType(token) 1101 if token != None and token[0] == "name": 1102 signature.append((self.type, token[1], None)) 1103 token = self.token() 1104 elif token != None and token[0] == "sep" and token[1] == ',': 1105 token = self.token() 1106 continue 1107 elif token != None and token[0] == "sep" and token[1] == ')': 1108 # only the type was provided 1109 if self.type == "...": 1110 signature.append((self.type, "...", None)) 1111 else: 1112 signature.append((self.type, None, None)) 1113 if token != None and token[0] == "sep": 1114 if token[1] == ',': 1115 token = self.token() 1116 continue 1117 elif token[1] == ')': 1118 token = self.token() 1119 break 1120 self.signature = signature 1121 return token 1122 1123 # 1124 # Parse a global definition, be it a type, variable or function 1125 # the extern "C" blocks are a bit nasty and require it to recurse. 1126 # 1127 def parseGlobal(self, token): 1128 static = 0 1129 if token[1] == 'extern': 1130 token = self.token() 1131 if token == None: 1132 return token 1133 if token[0] == 'string': 1134 if token[1] == 'C': 1135 token = self.token() 1136 if token == None: 1137 return token 1138 if token[0] == 'sep' and token[1] == "{": 1139 token = self.token() 1140# print 'Entering extern "C line ', self.lineno() 1141 while token != None and (token[0] != 'sep' or 1142 token[1] != "}"): 1143 if token[0] == 'name': 1144 token = self.parseGlobal(token) 1145 else: 1146 self.error( 1147 "token %s %s unexpected at the top level" % ( 1148 token[0], token[1])) 1149 token = self.parseGlobal(token) 1150# print 'Exiting extern "C" line', self.lineno() 1151 token = self.token() 1152 return token 1153 else: 1154 return token 1155 elif token[1] == 'static': 1156 static = 1 1157 token = self.token() 1158 if token == None or token[0] != 'name': 1159 return token 1160 1161 if token[1] == 'typedef': 1162 token = self.token() 1163 return self.parseTypedef(token) 1164 else: 1165 token = self.parseType(token) 1166 type_orig = self.type 1167 if token == None or token[0] != "name": 1168 return token 1169 type = type_orig 1170 self.name = token[1] 1171 token = self.token() 1172 while token != None and (token[0] == "sep" or token[0] == "op"): 1173 if token[0] == "sep": 1174 if token[1] == "[": 1175 type = type + token[1] 1176 token = self.token() 1177 while token != None and (token[0] != "sep" or \ 1178 token[1] != ";"): 1179 type = type + token[1] 1180 token = self.token() 1181 1182 if token != None and token[0] == "op" and token[1] == "=": 1183 # 1184 # Skip the initialization of the variable 1185 # 1186 token = self.token() 1187 if token[0] == 'sep' and token[1] == '{': 1188 token = self.token() 1189 token = self.parseBlock(token) 1190 else: 1191 self.comment = None 1192 while token != None and (token[0] != "sep" or \ 1193 (token[1] != ';' and token[1] != ',')): 1194 token = self.token() 1195 self.comment = None 1196 if token == None or token[0] != "sep" or (token[1] != ';' and 1197 token[1] != ','): 1198 self.error("missing ';' or ',' after value") 1199 1200 if token != None and token[0] == "sep": 1201 if token[1] == ";": 1202 self.comment = None 1203 token = self.token() 1204 if type == "struct": 1205 self.index.add(self.name, self.filename, 1206 not self.is_header, "struct", self.struct_fields) 1207 else: 1208 self.index.add(self.name, self.filename, 1209 not self.is_header, "variable", type) 1210 break 1211 elif token[1] == "(": 1212 token = self.token() 1213 token = self.parseSignature(token) 1214 if token == None: 1215 return None 1216 if token[0] == "sep" and token[1] == ";": 1217 d = self.mergeFunctionComment(self.name, 1218 ((type, None), self.signature), 1) 1219 self.index.add(self.name, self.filename, static, 1220 "function", d) 1221 token = self.token() 1222 elif token[0] == "sep" and token[1] == "{": 1223 d = self.mergeFunctionComment(self.name, 1224 ((type, None), self.signature), static) 1225 self.index.add(self.name, self.filename, static, 1226 "function", d) 1227 token = self.token() 1228 token = self.parseBlock(token); 1229 elif token[1] == ',': 1230 self.comment = None 1231 self.index.add(self.name, self.filename, static, 1232 "variable", type) 1233 type = type_orig 1234 token = self.token() 1235 while token != None and token[0] == "sep": 1236 type = type + token[1] 1237 token = self.token() 1238 if token != None and token[0] == "name": 1239 self.name = token[1] 1240 token = self.token() 1241 else: 1242 break 1243 1244 return token 1245 1246 def parse(self): 1247 print "Parsing %s" % (self.filename) 1248 token = self.token() 1249 while token != None: 1250 if token[0] == 'name': 1251 token = self.parseGlobal(token) 1252 else: 1253 self.error("token %s %s unexpected at the top level" % ( 1254 token[0], token[1])) 1255 token = self.parseGlobal(token) 1256 return 1257 return self.index 1258 1259 1260class docBuilder: 1261 """A documentation builder""" 1262 def __init__(self, name, directories=['.'], excludes=[]): 1263 self.name = name 1264 self.directories = directories 1265 self.excludes = excludes + ignored_files.keys() 1266 self.modules = {} 1267 self.headers = {} 1268 self.idx = index() 1269 1270 def analyze(self): 1271 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) 1272 self.idx.analyze() 1273 1274 def scanHeaders(self): 1275 for header in self.headers.keys(): 1276 parser = CParser(header) 1277 idx = parser.parse() 1278 self.headers[header] = idx; 1279 self.idx.merge(idx) 1280 1281 def scanModules(self): 1282 for module in self.modules.keys(): 1283 parser = CParser(module) 1284 idx = parser.parse() 1285 # idx.analyze() 1286 self.modules[module] = idx 1287 self.idx.merge_public(idx) 1288 1289 def scan(self): 1290 for directory in self.directories: 1291 files = glob.glob(directory + "/*.c") 1292 for file in files: 1293 skip = 0 1294 for excl in self.excludes: 1295 if string.find(file, excl) != -1: 1296 skip = 1; 1297 break 1298 if skip == 0: 1299 self.modules[file] = None; 1300 files = glob.glob(directory + "/*.h") 1301 for file in files: 1302 skip = 0 1303 for excl in self.excludes: 1304 if string.find(file, excl) != -1: 1305 skip = 1; 1306 break 1307 if skip == 0: 1308 self.headers[file] = None; 1309 self.scanHeaders() 1310 self.scanModules() 1311 1312 def modulename_file(self, file): 1313 module = os.path.basename(file) 1314 if module[-2:] == '.h': 1315 module = module[:-2] 1316 return module 1317 1318 def serialize_enum(self, output, name): 1319 id = self.idx.enums[name] 1320 output.write(" <enum name='%s' file='%s'" % (name, 1321 self.modulename_file(id.module))) 1322 if id.info != None: 1323 info = id.info 1324 if info[0] != None and info[0] != '': 1325 output.write(" value='%s'" % info[0]); 1326 if info[2] != None and info[2] != '': 1327 output.write(" type='%s'" % info[2]); 1328 if info[1] != None and info[1] != '': 1329 output.write(" info='%s'" % escape(info[1])); 1330 output.write("/>\n") 1331 1332 def serialize_macro(self, output, name): 1333 id = self.idx.macros[name] 1334 output.write(" <macro name='%s' file='%s'>\n" % (name, 1335 self.modulename_file(id.module))) 1336 if id.info != None: 1337 try: 1338 (args, desc) = id.info 1339 if desc != None and desc != "": 1340 output.write(" <info>%s</info>\n" % (escape(desc))) 1341 for arg in args: 1342 (name, desc) = arg 1343 if desc != None and desc != "": 1344 output.write(" <arg name='%s' info='%s'/>\n" % ( 1345 name, escape(desc))) 1346 else: 1347 output.write(" <arg name='%s'/>\n" % (name)) 1348 except: 1349 pass 1350 output.write(" </macro>\n") 1351 1352 def serialize_typedef(self, output, name): 1353 id = self.idx.typedefs[name] 1354 if id.info[0:7] == 'struct ': 1355 output.write(" <struct name='%s' file='%s' type='%s'" % ( 1356 name, self.modulename_file(id.module), id.info)) 1357 name = id.info[7:] 1358 if self.idx.structs.has_key(name) and ( \ 1359 type(self.idx.structs[name].info) == type(()) or 1360 type(self.idx.structs[name].info) == type([])): 1361 output.write(">\n"); 1362 try: 1363 for field in self.idx.structs[name].info: 1364 desc = field[2] 1365 if desc == None: 1366 desc = '' 1367 else: 1368 desc = escape(desc) 1369 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) 1370 except: 1371 print "Failed to serialize struct %s" % (name) 1372 output.write(" </struct>\n") 1373 else: 1374 output.write("/>\n"); 1375 else : 1376 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( 1377 name, self.modulename_file(id.module), id.info)) 1378 1379 def serialize_variable(self, output, name): 1380 id = self.idx.variables[name] 1381 if id.info != None: 1382 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( 1383 name, self.modulename_file(id.module), id.info)) 1384 else: 1385 output.write(" <variable name='%s' file='%s'/>\n" % ( 1386 name, self.modulename_file(id.module))) 1387 1388 def serialize_function(self, output, name): 1389 id = self.idx.functions[name] 1390 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, 1391 self.modulename_file(id.module))) 1392 try: 1393 (ret, params, desc) = id.info 1394 output.write(" <info>%s</info>\n" % (escape(desc))) 1395 if ret[0] != None: 1396 if ret[0] == "void": 1397 output.write(" <return type='void'/>\n") 1398 else: 1399 output.write(" <return type='%s' info='%s'/>\n" % ( 1400 ret[0], escape(ret[1]))) 1401 for param in params: 1402 if param[0] == 'void': 1403 continue 1404 if param[2] == None: 1405 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) 1406 else: 1407 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) 1408 except: 1409 print "Failed to save function %s info: " % name, `id.info` 1410 output.write(" </%s>\n" % (id.type)) 1411 1412 def serialize_exports(self, output, file): 1413 module = self.modulename_file(file) 1414 output.write(" <file name='%s'>\n" % (module)) 1415 dict = self.headers[file] 1416 ids = dict.functions.keys() + dict.variables.keys() + \ 1417 dict.macros.keys() + dict.typedefs.keys() + \ 1418 dict.structs.keys() + dict.enums.keys() 1419 ids.sort() 1420 for id in ids: 1421 output.write(" <exports symbol='%s'/>\n" % (id)) 1422 output.write(" </file>\n") 1423 1424 1425 def serialize(self, filename = None): 1426 if filename == None: 1427 filename = "%s-api.xml" % self.name 1428 print "Saving XML description %s" % (filename) 1429 output = open(filename, "w") 1430 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') 1431 output.write("<api name='%s'>\n" % self.name) 1432 output.write(" <files>\n") 1433 for file in self.headers.keys(): 1434 self.serialize_exports(output, file) 1435 output.write(" </files>\n") 1436 output.write(" <symbols>\n") 1437 macros = self.idx.macros.keys() 1438 macros.sort() 1439 for macro in macros: 1440 self.serialize_macro(output, macro) 1441 enums = self.idx.enums.keys() 1442 enums.sort() 1443 for enum in enums: 1444 self.serialize_enum(output, enum) 1445 typedefs = self.idx.typedefs.keys() 1446 typedefs.sort() 1447 for typedef in typedefs: 1448 self.serialize_typedef(output, typedef) 1449 variables = self.idx.variables.keys() 1450 variables.sort() 1451 for variable in variables: 1452 self.serialize_variable(output, variable) 1453 functions = self.idx.functions.keys() 1454 functions.sort() 1455 for function in functions: 1456 self.serialize_function(output, function) 1457 output.write(" </symbols>\n") 1458 output.write("</api>\n") 1459 output.close() 1460 1461 1462def rebuild(): 1463 builder = None 1464 if glob.glob("../parser.c") != [] : 1465 print "Rebuilding API description for libxml2" 1466 builder = docBuilder("libxml2", ["..", "../include/libxml"], 1467 ["xmlwin32version.h", "tst.c"]) 1468 elif glob.glob("../libxslt/transform.c") != [] : 1469 print "Rebuilding API description for libxslt" 1470 builder = docBuilder("libxslt", ["../libxslt"], 1471 ["win32config.h", "libxslt.h", "tst.c"]) 1472 else: 1473 print "rebuild() failed, unable to guess the module" 1474 return None 1475 builder.scan() 1476 builder.analyze() 1477 builder.serialize() 1478 if glob.glob("../libexslt/exslt.c") != [] : 1479 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) 1480 extra.scan() 1481 extra.analyze() 1482 extra.serialize() 1483 return builder 1484 1485# 1486# for debugging the parser 1487# 1488def parse(filename): 1489 parser = CParser(filename) 1490 idx = parser.parse() 1491 return idx 1492 1493if __name__ == "__main__": 1494 rebuild() 1495