1#!/usr/bin/env python 2# 3# Copyright 2007 Neal Norwitz 4# Portions Copyright 2007 Google Inc. 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18"""Generate an Abstract Syntax Tree (AST) for C++.""" 19 20__author__ = 'nnorwitz@google.com (Neal Norwitz)' 21 22 23# TODO: 24# * Tokens should never be exported, need to convert to Nodes 25# (return types, parameters, etc.) 26# * Handle static class data for templatized classes 27# * Handle casts (both C++ and C-style) 28# * Handle conditions and loops (if/else, switch, for, while/do) 29# 30# TODO much, much later: 31# * Handle #define 32# * exceptions 33 34 35try: 36 # Python 3.x 37 import builtins 38except ImportError: 39 # Python 2.x 40 import __builtin__ as builtins 41 42import sys 43import traceback 44 45from cpp import keywords 46from cpp import tokenize 47from cpp import utils 48 49 50if not hasattr(builtins, 'reversed'): 51 # Support Python 2.3 and earlier. 52 def reversed(seq): 53 for i in range(len(seq)-1, -1, -1): 54 yield seq[i] 55 56if not hasattr(builtins, 'next'): 57 # Support Python 2.5 and earlier. 58 def next(obj): 59 return obj.next() 60 61 62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) 63 64FUNCTION_NONE = 0x00 65FUNCTION_CONST = 0x01 66FUNCTION_VIRTUAL = 0x02 67FUNCTION_PURE_VIRTUAL = 0x04 68FUNCTION_CTOR = 0x08 69FUNCTION_DTOR = 0x10 70FUNCTION_ATTRIBUTE = 0x20 71FUNCTION_UNKNOWN_ANNOTATION = 0x40 72FUNCTION_THROW = 0x80 73 74""" 75These are currently unused. Should really handle these properly at some point. 76 77TYPE_MODIFIER_INLINE = 0x010000 78TYPE_MODIFIER_EXTERN = 0x020000 79TYPE_MODIFIER_STATIC = 0x040000 80TYPE_MODIFIER_CONST = 0x080000 81TYPE_MODIFIER_REGISTER = 0x100000 82TYPE_MODIFIER_VOLATILE = 0x200000 83TYPE_MODIFIER_MUTABLE = 0x400000 84 85TYPE_MODIFIER_MAP = { 86 'inline': TYPE_MODIFIER_INLINE, 87 'extern': TYPE_MODIFIER_EXTERN, 88 'static': TYPE_MODIFIER_STATIC, 89 'const': TYPE_MODIFIER_CONST, 90 'register': TYPE_MODIFIER_REGISTER, 91 'volatile': TYPE_MODIFIER_VOLATILE, 92 'mutable': TYPE_MODIFIER_MUTABLE, 93 } 94""" 95 96_INTERNAL_TOKEN = 'internal' 97_NAMESPACE_POP = 'ns-pop' 98 99 100# TODO(nnorwitz): use this as a singleton for templated_types, etc 101# where we don't want to create a new empty dict each time. It is also const. 102class _NullDict(object): 103 __contains__ = lambda self: False 104 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () 105 106 107# TODO(nnorwitz): move AST nodes into a separate module. 108class Node(object): 109 """Base AST node.""" 110 111 def __init__(self, start, end): 112 self.start = start 113 self.end = end 114 115 def IsDeclaration(self): 116 """Returns bool if this node is a declaration.""" 117 return False 118 119 def IsDefinition(self): 120 """Returns bool if this node is a definition.""" 121 return False 122 123 def IsExportable(self): 124 """Returns bool if this node exportable from a header file.""" 125 return False 126 127 def Requires(self, node): 128 """Does this AST node require the definition of the node passed in?""" 129 return False 130 131 def XXX__str__(self): 132 return self._StringHelper(self.__class__.__name__, '') 133 134 def _StringHelper(self, name, suffix): 135 if not utils.DEBUG: 136 return '%s(%s)' % (name, suffix) 137 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) 138 139 def __repr__(self): 140 return str(self) 141 142 143class Define(Node): 144 def __init__(self, start, end, name, definition): 145 Node.__init__(self, start, end) 146 self.name = name 147 self.definition = definition 148 149 def __str__(self): 150 value = '%s %s' % (self.name, self.definition) 151 return self._StringHelper(self.__class__.__name__, value) 152 153 154class Include(Node): 155 def __init__(self, start, end, filename, system): 156 Node.__init__(self, start, end) 157 self.filename = filename 158 self.system = system 159 160 def __str__(self): 161 fmt = '"%s"' 162 if self.system: 163 fmt = '<%s>' 164 return self._StringHelper(self.__class__.__name__, fmt % self.filename) 165 166 167class Goto(Node): 168 def __init__(self, start, end, label): 169 Node.__init__(self, start, end) 170 self.label = label 171 172 def __str__(self): 173 return self._StringHelper(self.__class__.__name__, str(self.label)) 174 175 176class Expr(Node): 177 def __init__(self, start, end, expr): 178 Node.__init__(self, start, end) 179 self.expr = expr 180 181 def Requires(self, node): 182 # TODO(nnorwitz): impl. 183 return False 184 185 def __str__(self): 186 return self._StringHelper(self.__class__.__name__, str(self.expr)) 187 188 189class Return(Expr): 190 pass 191 192 193class Delete(Expr): 194 pass 195 196 197class Friend(Expr): 198 def __init__(self, start, end, expr, namespace): 199 Expr.__init__(self, start, end, expr) 200 self.namespace = namespace[:] 201 202 203class Using(Node): 204 def __init__(self, start, end, names): 205 Node.__init__(self, start, end) 206 self.names = names 207 208 def __str__(self): 209 return self._StringHelper(self.__class__.__name__, str(self.names)) 210 211 212class Parameter(Node): 213 def __init__(self, start, end, name, parameter_type, default): 214 Node.__init__(self, start, end) 215 self.name = name 216 self.type = parameter_type 217 self.default = default 218 219 def Requires(self, node): 220 # TODO(nnorwitz): handle namespaces, etc. 221 return self.type.name == node.name 222 223 def __str__(self): 224 name = str(self.type) 225 suffix = '%s %s' % (name, self.name) 226 if self.default: 227 suffix += ' = ' + ''.join([d.name for d in self.default]) 228 return self._StringHelper(self.__class__.__name__, suffix) 229 230 231class _GenericDeclaration(Node): 232 def __init__(self, start, end, name, namespace): 233 Node.__init__(self, start, end) 234 self.name = name 235 self.namespace = namespace[:] 236 237 def FullName(self): 238 prefix = '' 239 if self.namespace and self.namespace[-1]: 240 prefix = '::'.join(self.namespace) + '::' 241 return prefix + self.name 242 243 def _TypeStringHelper(self, suffix): 244 if self.namespace: 245 names = [n or '<anonymous>' for n in self.namespace] 246 suffix += ' in ' + '::'.join(names) 247 return self._StringHelper(self.__class__.__name__, suffix) 248 249 250# TODO(nnorwitz): merge with Parameter in some way? 251class VariableDeclaration(_GenericDeclaration): 252 def __init__(self, start, end, name, var_type, initial_value, namespace): 253 _GenericDeclaration.__init__(self, start, end, name, namespace) 254 self.type = var_type 255 self.initial_value = initial_value 256 257 def Requires(self, node): 258 # TODO(nnorwitz): handle namespaces, etc. 259 return self.type.name == node.name 260 261 def ToString(self): 262 """Return a string that tries to reconstitute the variable decl.""" 263 suffix = '%s %s' % (self.type, self.name) 264 if self.initial_value: 265 suffix += ' = ' + self.initial_value 266 return suffix 267 268 def __str__(self): 269 return self._StringHelper(self.__class__.__name__, self.ToString()) 270 271 272class Typedef(_GenericDeclaration): 273 def __init__(self, start, end, name, alias, namespace): 274 _GenericDeclaration.__init__(self, start, end, name, namespace) 275 self.alias = alias 276 277 def IsDefinition(self): 278 return True 279 280 def IsExportable(self): 281 return True 282 283 def Requires(self, node): 284 # TODO(nnorwitz): handle namespaces, etc. 285 name = node.name 286 for token in self.alias: 287 if token is not None and name == token.name: 288 return True 289 return False 290 291 def __str__(self): 292 suffix = '%s, %s' % (self.name, self.alias) 293 return self._TypeStringHelper(suffix) 294 295 296class _NestedType(_GenericDeclaration): 297 def __init__(self, start, end, name, fields, namespace): 298 _GenericDeclaration.__init__(self, start, end, name, namespace) 299 self.fields = fields 300 301 def IsDefinition(self): 302 return True 303 304 def IsExportable(self): 305 return True 306 307 def __str__(self): 308 suffix = '%s, {%s}' % (self.name, self.fields) 309 return self._TypeStringHelper(suffix) 310 311 312class Union(_NestedType): 313 pass 314 315 316class Enum(_NestedType): 317 pass 318 319 320class Class(_GenericDeclaration): 321 def __init__(self, start, end, name, bases, templated_types, body, namespace): 322 _GenericDeclaration.__init__(self, start, end, name, namespace) 323 self.bases = bases 324 self.body = body 325 self.templated_types = templated_types 326 327 def IsDeclaration(self): 328 return self.bases is None and self.body is None 329 330 def IsDefinition(self): 331 return not self.IsDeclaration() 332 333 def IsExportable(self): 334 return not self.IsDeclaration() 335 336 def Requires(self, node): 337 # TODO(nnorwitz): handle namespaces, etc. 338 if self.bases: 339 for token_list in self.bases: 340 # TODO(nnorwitz): bases are tokens, do name comparision. 341 for token in token_list: 342 if token.name == node.name: 343 return True 344 # TODO(nnorwitz): search in body too. 345 return False 346 347 def __str__(self): 348 name = self.name 349 if self.templated_types: 350 name += '<%s>' % self.templated_types 351 suffix = '%s, %s, %s' % (name, self.bases, self.body) 352 return self._TypeStringHelper(suffix) 353 354 355class Struct(Class): 356 pass 357 358 359class Function(_GenericDeclaration): 360 def __init__(self, start, end, name, return_type, parameters, 361 modifiers, templated_types, body, namespace): 362 _GenericDeclaration.__init__(self, start, end, name, namespace) 363 converter = TypeConverter(namespace) 364 self.return_type = converter.CreateReturnType(return_type) 365 self.parameters = converter.ToParameters(parameters) 366 self.modifiers = modifiers 367 self.body = body 368 self.templated_types = templated_types 369 370 def IsDeclaration(self): 371 return self.body is None 372 373 def IsDefinition(self): 374 return self.body is not None 375 376 def IsExportable(self): 377 if self.return_type and 'static' in self.return_type.modifiers: 378 return False 379 return None not in self.namespace 380 381 def Requires(self, node): 382 if self.parameters: 383 # TODO(nnorwitz): parameters are tokens, do name comparision. 384 for p in self.parameters: 385 if p.name == node.name: 386 return True 387 # TODO(nnorwitz): search in body too. 388 return False 389 390 def __str__(self): 391 # TODO(nnorwitz): add templated_types. 392 suffix = ('%s %s(%s), 0x%02x, %s' % 393 (self.return_type, self.name, self.parameters, 394 self.modifiers, self.body)) 395 return self._TypeStringHelper(suffix) 396 397 398class Method(Function): 399 def __init__(self, start, end, name, in_class, return_type, parameters, 400 modifiers, templated_types, body, namespace): 401 Function.__init__(self, start, end, name, return_type, parameters, 402 modifiers, templated_types, body, namespace) 403 # TODO(nnorwitz): in_class could also be a namespace which can 404 # mess up finding functions properly. 405 self.in_class = in_class 406 407 408class Type(_GenericDeclaration): 409 """Type used for any variable (eg class, primitive, struct, etc).""" 410 411 def __init__(self, start, end, name, templated_types, modifiers, 412 reference, pointer, array): 413 """ 414 Args: 415 name: str name of main type 416 templated_types: [Class (Type?)] template type info between <> 417 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. 418 reference, pointer, array: bools 419 """ 420 _GenericDeclaration.__init__(self, start, end, name, []) 421 self.templated_types = templated_types 422 if not name and modifiers: 423 self.name = modifiers.pop() 424 self.modifiers = modifiers 425 self.reference = reference 426 self.pointer = pointer 427 self.array = array 428 429 def __str__(self): 430 prefix = '' 431 if self.modifiers: 432 prefix = ' '.join(self.modifiers) + ' ' 433 name = str(self.name) 434 if self.templated_types: 435 name += '<%s>' % self.templated_types 436 suffix = prefix + name 437 if self.reference: 438 suffix += '&' 439 if self.pointer: 440 suffix += '*' 441 if self.array: 442 suffix += '[]' 443 return self._TypeStringHelper(suffix) 444 445 # By definition, Is* are always False. A Type can only exist in 446 # some sort of variable declaration, parameter, or return value. 447 def IsDeclaration(self): 448 return False 449 450 def IsDefinition(self): 451 return False 452 453 def IsExportable(self): 454 return False 455 456 457class TypeConverter(object): 458 459 def __init__(self, namespace_stack): 460 self.namespace_stack = namespace_stack 461 462 def _GetTemplateEnd(self, tokens, start): 463 count = 1 464 end = start 465 while 1: 466 token = tokens[end] 467 end += 1 468 if token.name == '<': 469 count += 1 470 elif token.name == '>': 471 count -= 1 472 if count == 0: 473 break 474 return tokens[start:end-1], end 475 476 def ToType(self, tokens): 477 """Convert [Token,...] to [Class(...), ] useful for base classes. 478 For example, code like class Foo : public Bar<x, y> { ... }; 479 the "Bar<x, y>" portion gets converted to an AST. 480 481 Returns: 482 [Class(...), ...] 483 """ 484 result = [] 485 name_tokens = [] 486 reference = pointer = array = False 487 488 def AddType(templated_types): 489 # Partition tokens into name and modifier tokens. 490 names = [] 491 modifiers = [] 492 for t in name_tokens: 493 if keywords.IsKeyword(t.name): 494 modifiers.append(t.name) 495 else: 496 names.append(t.name) 497 name = ''.join(names) 498 result.append(Type(name_tokens[0].start, name_tokens[-1].end, 499 name, templated_types, modifiers, 500 reference, pointer, array)) 501 del name_tokens[:] 502 503 i = 0 504 end = len(tokens) 505 while i < end: 506 token = tokens[i] 507 if token.name == '<': 508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) 509 AddType(self.ToType(new_tokens)) 510 # If there is a comma after the template, we need to consume 511 # that here otherwise it becomes part of the name. 512 i = new_end 513 reference = pointer = array = False 514 elif token.name == ',': 515 AddType([]) 516 reference = pointer = array = False 517 elif token.name == '*': 518 pointer = True 519 elif token.name == '&': 520 reference = True 521 elif token.name == '[': 522 pointer = True 523 elif token.name == ']': 524 pass 525 else: 526 name_tokens.append(token) 527 i += 1 528 529 if name_tokens: 530 # No '<' in the tokens, just a simple name and no template. 531 AddType([]) 532 return result 533 534 def DeclarationToParts(self, parts, needs_name_removed): 535 name = None 536 default = [] 537 if needs_name_removed: 538 # Handle default (initial) values properly. 539 for i, t in enumerate(parts): 540 if t.name == '=': 541 default = parts[i+1:] 542 name = parts[i-1].name 543 if name == ']' and parts[i-2].name == '[': 544 name = parts[i-3].name 545 i -= 1 546 parts = parts[:i-1] 547 break 548 else: 549 if parts[-1].token_type == tokenize.NAME: 550 name = parts.pop().name 551 else: 552 # TODO(nnorwitz): this is a hack that happens for code like 553 # Register(Foo<T>); where it thinks this is a function call 554 # but it's actually a declaration. 555 name = '???' 556 modifiers = [] 557 type_name = [] 558 other_tokens = [] 559 templated_types = [] 560 i = 0 561 end = len(parts) 562 while i < end: 563 p = parts[i] 564 if keywords.IsKeyword(p.name): 565 modifiers.append(p.name) 566 elif p.name == '<': 567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) 568 templated_types = self.ToType(templated_tokens) 569 i = new_end - 1 570 # Don't add a spurious :: to data members being initialized. 571 next_index = i + 1 572 if next_index < end and parts[next_index].name == '::': 573 i += 1 574 elif p.name in ('[', ']', '='): 575 # These are handled elsewhere. 576 other_tokens.append(p) 577 elif p.name not in ('*', '&', '>'): 578 # Ensure that names have a space between them. 579 if (type_name and type_name[-1].token_type == tokenize.NAME and 580 p.token_type == tokenize.NAME): 581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) 582 type_name.append(p) 583 else: 584 other_tokens.append(p) 585 i += 1 586 type_name = ''.join([t.name for t in type_name]) 587 return name, type_name, templated_types, modifiers, default, other_tokens 588 589 def ToParameters(self, tokens): 590 if not tokens: 591 return [] 592 593 result = [] 594 name = type_name = '' 595 type_modifiers = [] 596 pointer = reference = array = False 597 first_token = None 598 default = [] 599 600 def AddParameter(): 601 if default: 602 del default[0] # Remove flag. 603 end = type_modifiers[-1].end 604 parts = self.DeclarationToParts(type_modifiers, True) 605 (name, type_name, templated_types, modifiers, 606 unused_default, unused_other_tokens) = parts 607 parameter_type = Type(first_token.start, first_token.end, 608 type_name, templated_types, modifiers, 609 reference, pointer, array) 610 p = Parameter(first_token.start, end, name, 611 parameter_type, default) 612 result.append(p) 613 614 template_count = 0 615 for s in tokens: 616 if not first_token: 617 first_token = s 618 if s.name == '<': 619 template_count += 1 620 elif s.name == '>': 621 template_count -= 1 622 if template_count > 0: 623 type_modifiers.append(s) 624 continue 625 626 if s.name == ',': 627 AddParameter() 628 name = type_name = '' 629 type_modifiers = [] 630 pointer = reference = array = False 631 first_token = None 632 default = [] 633 elif s.name == '*': 634 pointer = True 635 elif s.name == '&': 636 reference = True 637 elif s.name == '[': 638 array = True 639 elif s.name == ']': 640 pass # Just don't add to type_modifiers. 641 elif s.name == '=': 642 # Got a default value. Add any value (None) as a flag. 643 default.append(None) 644 elif default: 645 default.append(s) 646 else: 647 type_modifiers.append(s) 648 AddParameter() 649 return result 650 651 def CreateReturnType(self, return_type_seq): 652 if not return_type_seq: 653 return None 654 start = return_type_seq[0].start 655 end = return_type_seq[-1].end 656 _, name, templated_types, modifiers, default, other_tokens = \ 657 self.DeclarationToParts(return_type_seq, False) 658 names = [n.name for n in other_tokens] 659 reference = '&' in names 660 pointer = '*' in names 661 array = '[' in names 662 return Type(start, end, name, templated_types, modifiers, 663 reference, pointer, array) 664 665 def GetTemplateIndices(self, names): 666 # names is a list of strings. 667 start = names.index('<') 668 end = len(names) - 1 669 while end > 0: 670 if names[end] == '>': 671 break 672 end -= 1 673 return start, end+1 674 675class AstBuilder(object): 676 def __init__(self, token_stream, filename, in_class='', visibility=None, 677 namespace_stack=[]): 678 self.tokens = token_stream 679 self.filename = filename 680 # TODO(nnorwitz): use a better data structure (deque) for the queue. 681 # Switching directions of the "queue" improved perf by about 25%. 682 # Using a deque should be even better since we access from both sides. 683 self.token_queue = [] 684 self.namespace_stack = namespace_stack[:] 685 self.in_class = in_class 686 if in_class is None: 687 self.in_class_name_only = None 688 else: 689 self.in_class_name_only = in_class.split('::')[-1] 690 self.visibility = visibility 691 self.in_function = False 692 self.current_token = None 693 # Keep the state whether we are currently handling a typedef or not. 694 self._handling_typedef = False 695 696 self.converter = TypeConverter(self.namespace_stack) 697 698 def HandleError(self, msg, token): 699 printable_queue = list(reversed(self.token_queue[-20:])) 700 sys.stderr.write('Got %s in %s @ %s %s\n' % 701 (msg, self.filename, token, printable_queue)) 702 703 def Generate(self): 704 while 1: 705 token = self._GetNextToken() 706 if not token: 707 break 708 709 # Get the next token. 710 self.current_token = token 711 712 # Dispatch on the next token type. 713 if token.token_type == _INTERNAL_TOKEN: 714 if token.name == _NAMESPACE_POP: 715 self.namespace_stack.pop() 716 continue 717 718 try: 719 result = self._GenerateOne(token) 720 if result is not None: 721 yield result 722 except: 723 self.HandleError('exception', token) 724 raise 725 726 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, 727 ref_pointer_name_seq, templated_types, value=None): 728 reference = '&' in ref_pointer_name_seq 729 pointer = '*' in ref_pointer_name_seq 730 array = '[' in ref_pointer_name_seq 731 var_type = Type(pos_token.start, pos_token.end, type_name, 732 templated_types, type_modifiers, 733 reference, pointer, array) 734 return VariableDeclaration(pos_token.start, pos_token.end, 735 name, var_type, value, self.namespace_stack) 736 737 def _GenerateOne(self, token): 738 if token.token_type == tokenize.NAME: 739 if (keywords.IsKeyword(token.name) and 740 not keywords.IsBuiltinType(token.name)): 741 method = getattr(self, 'handle_' + token.name) 742 return method() 743 elif token.name == self.in_class_name_only: 744 # The token name is the same as the class, must be a ctor if 745 # there is a paren. Otherwise, it's the return type. 746 # Peek ahead to get the next token to figure out which. 747 next = self._GetNextToken() 748 self._AddBackToken(next) 749 if next.token_type == tokenize.SYNTAX and next.name == '(': 750 return self._GetMethod([token], FUNCTION_CTOR, None, True) 751 # Fall through--handle like any other method. 752 753 # Handle data or function declaration/definition. 754 syntax = tokenize.SYNTAX 755 temp_tokens, last_token = \ 756 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') 757 temp_tokens.insert(0, token) 758 if last_token.name == '(': 759 # If there is an assignment before the paren, 760 # this is an expression, not a method. 761 expr = bool([e for e in temp_tokens if e.name == '=']) 762 if expr: 763 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') 764 temp_tokens.append(last_token) 765 temp_tokens.extend(new_temp) 766 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) 767 768 if last_token.name == '[': 769 # Handle array, this isn't a method, unless it's an operator. 770 # TODO(nnorwitz): keep the size somewhere. 771 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') 772 temp_tokens.append(last_token) 773 if temp_tokens[-2].name == 'operator': 774 temp_tokens.append(self._GetNextToken()) 775 else: 776 temp_tokens2, last_token = \ 777 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') 778 temp_tokens.extend(temp_tokens2) 779 780 if last_token.name == ';': 781 # Handle data, this isn't a method. 782 parts = self.converter.DeclarationToParts(temp_tokens, True) 783 (name, type_name, templated_types, modifiers, default, 784 unused_other_tokens) = parts 785 786 t0 = temp_tokens[0] 787 names = [t.name for t in temp_tokens] 788 if templated_types: 789 start, end = self.converter.GetTemplateIndices(names) 790 names = names[:start] + names[end:] 791 default = ''.join([t.name for t in default]) 792 return self._CreateVariable(t0, name, type_name, modifiers, 793 names, templated_types, default) 794 if last_token.name == '{': 795 self._AddBackTokens(temp_tokens[1:]) 796 self._AddBackToken(last_token) 797 method_name = temp_tokens[0].name 798 method = getattr(self, 'handle_' + method_name, None) 799 if not method: 800 # Must be declaring a variable. 801 # TODO(nnorwitz): handle the declaration. 802 return None 803 return method() 804 return self._GetMethod(temp_tokens, 0, None, False) 805 elif token.token_type == tokenize.SYNTAX: 806 if token.name == '~' and self.in_class: 807 # Must be a dtor (probably not in method body). 808 token = self._GetNextToken() 809 # self.in_class can contain A::Name, but the dtor will only 810 # be Name. Make sure to compare against the right value. 811 if (token.token_type == tokenize.NAME and 812 token.name == self.in_class_name_only): 813 return self._GetMethod([token], FUNCTION_DTOR, None, True) 814 # TODO(nnorwitz): handle a lot more syntax. 815 elif token.token_type == tokenize.PREPROCESSOR: 816 # TODO(nnorwitz): handle more preprocessor directives. 817 # token starts with a #, so remove it and strip whitespace. 818 name = token.name[1:].lstrip() 819 if name.startswith('include'): 820 # Remove "include". 821 name = name[7:].strip() 822 assert name 823 # Handle #include \<newline> "header-on-second-line.h". 824 if name.startswith('\\'): 825 name = name[1:].strip() 826 assert name[0] in '<"', token 827 assert name[-1] in '>"', token 828 system = name[0] == '<' 829 filename = name[1:-1] 830 return Include(token.start, token.end, filename, system) 831 if name.startswith('define'): 832 # Remove "define". 833 name = name[6:].strip() 834 assert name 835 value = '' 836 for i, c in enumerate(name): 837 if c.isspace(): 838 value = name[i:].lstrip() 839 name = name[:i] 840 break 841 return Define(token.start, token.end, name, value) 842 if name.startswith('if') and name[2:3].isspace(): 843 condition = name[3:].strip() 844 if condition.startswith('0') or condition.startswith('(0)'): 845 self._SkipIf0Blocks() 846 return None 847 848 def _GetTokensUpTo(self, expected_token_type, expected_token): 849 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] 850 851 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): 852 last_token = self._GetNextToken() 853 tokens = [] 854 while (last_token.token_type != expected_token_type or 855 last_token.name not in expected_tokens): 856 tokens.append(last_token) 857 last_token = self._GetNextToken() 858 return tokens, last_token 859 860 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary. 861 def _IgnoreUpTo(self, token_type, token): 862 unused_tokens = self._GetTokensUpTo(token_type, token) 863 864 def _SkipIf0Blocks(self): 865 count = 1 866 while 1: 867 token = self._GetNextToken() 868 if token.token_type != tokenize.PREPROCESSOR: 869 continue 870 871 name = token.name[1:].lstrip() 872 if name.startswith('endif'): 873 count -= 1 874 if count == 0: 875 break 876 elif name.startswith('if'): 877 count += 1 878 879 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): 880 if GetNextToken is None: 881 GetNextToken = self._GetNextToken 882 # Assumes the current token is open_paren and we will consume 883 # and return up to the close_paren. 884 count = 1 885 token = GetNextToken() 886 while 1: 887 if token.token_type == tokenize.SYNTAX: 888 if token.name == open_paren: 889 count += 1 890 elif token.name == close_paren: 891 count -= 1 892 if count == 0: 893 break 894 yield token 895 token = GetNextToken() 896 yield token 897 898 def _GetParameters(self): 899 return self._GetMatchingChar('(', ')') 900 901 def GetScope(self): 902 return self._GetMatchingChar('{', '}') 903 904 def _GetNextToken(self): 905 if self.token_queue: 906 return self.token_queue.pop() 907 return next(self.tokens) 908 909 def _AddBackToken(self, token): 910 if token.whence == tokenize.WHENCE_STREAM: 911 token.whence = tokenize.WHENCE_QUEUE 912 self.token_queue.insert(0, token) 913 else: 914 assert token.whence == tokenize.WHENCE_QUEUE, token 915 self.token_queue.append(token) 916 917 def _AddBackTokens(self, tokens): 918 if tokens: 919 if tokens[-1].whence == tokenize.WHENCE_STREAM: 920 for token in tokens: 921 token.whence = tokenize.WHENCE_QUEUE 922 self.token_queue[:0] = reversed(tokens) 923 else: 924 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens 925 self.token_queue.extend(reversed(tokens)) 926 927 def GetName(self, seq=None): 928 """Returns ([tokens], next_token_info).""" 929 GetNextToken = self._GetNextToken 930 if seq is not None: 931 it = iter(seq) 932 GetNextToken = lambda: next(it) 933 next_token = GetNextToken() 934 tokens = [] 935 last_token_was_name = False 936 while (next_token.token_type == tokenize.NAME or 937 (next_token.token_type == tokenize.SYNTAX and 938 next_token.name in ('::', '<'))): 939 # Two NAMEs in a row means the identifier should terminate. 940 # It's probably some sort of variable declaration. 941 if last_token_was_name and next_token.token_type == tokenize.NAME: 942 break 943 last_token_was_name = next_token.token_type == tokenize.NAME 944 tokens.append(next_token) 945 # Handle templated names. 946 if next_token.name == '<': 947 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) 948 last_token_was_name = True 949 next_token = GetNextToken() 950 return tokens, next_token 951 952 def GetMethod(self, modifiers, templated_types): 953 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 954 assert len(return_type_and_name) >= 1 955 return self._GetMethod(return_type_and_name, modifiers, templated_types, 956 False) 957 958 def _GetMethod(self, return_type_and_name, modifiers, templated_types, 959 get_paren): 960 template_portion = None 961 if get_paren: 962 token = self._GetNextToken() 963 assert token.token_type == tokenize.SYNTAX, token 964 if token.name == '<': 965 # Handle templatized dtors. 966 template_portion = [token] 967 template_portion.extend(self._GetMatchingChar('<', '>')) 968 token = self._GetNextToken() 969 assert token.token_type == tokenize.SYNTAX, token 970 assert token.name == '(', token 971 972 name = return_type_and_name.pop() 973 # Handle templatized ctors. 974 if name.name == '>': 975 index = 1 976 while return_type_and_name[index].name != '<': 977 index += 1 978 template_portion = return_type_and_name[index:] + [name] 979 del return_type_and_name[index:] 980 name = return_type_and_name.pop() 981 elif name.name == ']': 982 rt = return_type_and_name 983 assert rt[-1].name == '[', return_type_and_name 984 assert rt[-2].name == 'operator', return_type_and_name 985 name_seq = return_type_and_name[-2:] 986 del return_type_and_name[-2:] 987 name = tokenize.Token(tokenize.NAME, 'operator[]', 988 name_seq[0].start, name.end) 989 # Get the open paren so _GetParameters() below works. 990 unused_open_paren = self._GetNextToken() 991 992 # TODO(nnorwitz): store template_portion. 993 return_type = return_type_and_name 994 indices = name 995 if return_type: 996 indices = return_type[0] 997 998 # Force ctor for templatized ctors. 999 if name.name == self.in_class and not modifiers: 1000 modifiers |= FUNCTION_CTOR 1001 parameters = list(self._GetParameters()) 1002 del parameters[-1] # Remove trailing ')'. 1003 1004 # Handling operator() is especially weird. 1005 if name.name == 'operator' and not parameters: 1006 token = self._GetNextToken() 1007 assert token.name == '(', token 1008 parameters = list(self._GetParameters()) 1009 del parameters[-1] # Remove trailing ')'. 1010 1011 token = self._GetNextToken() 1012 while token.token_type == tokenize.NAME: 1013 modifier_token = token 1014 token = self._GetNextToken() 1015 if modifier_token.name == 'const': 1016 modifiers |= FUNCTION_CONST 1017 elif modifier_token.name == '__attribute__': 1018 # TODO(nnorwitz): handle more __attribute__ details. 1019 modifiers |= FUNCTION_ATTRIBUTE 1020 assert token.name == '(', token 1021 # Consume everything between the (parens). 1022 unused_tokens = list(self._GetMatchingChar('(', ')')) 1023 token = self._GetNextToken() 1024 elif modifier_token.name == 'throw': 1025 modifiers |= FUNCTION_THROW 1026 assert token.name == '(', token 1027 # Consume everything between the (parens). 1028 unused_tokens = list(self._GetMatchingChar('(', ')')) 1029 token = self._GetNextToken() 1030 elif modifier_token.name == modifier_token.name.upper(): 1031 # HACK(nnorwitz): assume that all upper-case names 1032 # are some macro we aren't expanding. 1033 modifiers |= FUNCTION_UNKNOWN_ANNOTATION 1034 else: 1035 self.HandleError('unexpected token', modifier_token) 1036 1037 assert token.token_type == tokenize.SYNTAX, token 1038 # Handle ctor initializers. 1039 if token.name == ':': 1040 # TODO(nnorwitz): anything else to handle for initializer list? 1041 while token.name != ';' and token.name != '{': 1042 token = self._GetNextToken() 1043 1044 # Handle pointer to functions that are really data but look 1045 # like method declarations. 1046 if token.name == '(': 1047 if parameters[0].name == '*': 1048 # name contains the return type. 1049 name = parameters.pop() 1050 # parameters contains the name of the data. 1051 modifiers = [p.name for p in parameters] 1052 # Already at the ( to open the parameter list. 1053 function_parameters = list(self._GetMatchingChar('(', ')')) 1054 del function_parameters[-1] # Remove trailing ')'. 1055 # TODO(nnorwitz): store the function_parameters. 1056 token = self._GetNextToken() 1057 assert token.token_type == tokenize.SYNTAX, token 1058 assert token.name == ';', token 1059 return self._CreateVariable(indices, name.name, indices.name, 1060 modifiers, '', None) 1061 # At this point, we got something like: 1062 # return_type (type::*name_)(params); 1063 # This is a data member called name_ that is a function pointer. 1064 # With this code: void (sq_type::*field_)(string&); 1065 # We get: name=void return_type=[] parameters=sq_type ... field_ 1066 # TODO(nnorwitz): is return_type always empty? 1067 # TODO(nnorwitz): this isn't even close to being correct. 1068 # Just put in something so we don't crash and can move on. 1069 real_name = parameters[-1] 1070 modifiers = [p.name for p in self._GetParameters()] 1071 del modifiers[-1] # Remove trailing ')'. 1072 return self._CreateVariable(indices, real_name.name, indices.name, 1073 modifiers, '', None) 1074 1075 if token.name == '{': 1076 body = list(self.GetScope()) 1077 del body[-1] # Remove trailing '}'. 1078 else: 1079 body = None 1080 if token.name == '=': 1081 token = self._GetNextToken() 1082 assert token.token_type == tokenize.CONSTANT, token 1083 assert token.name == '0', token 1084 modifiers |= FUNCTION_PURE_VIRTUAL 1085 token = self._GetNextToken() 1086 1087 if token.name == '[': 1088 # TODO(nnorwitz): store tokens and improve parsing. 1089 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; 1090 tokens = list(self._GetMatchingChar('[', ']')) 1091 token = self._GetNextToken() 1092 1093 assert token.name == ';', (token, return_type_and_name, parameters) 1094 1095 # Looks like we got a method, not a function. 1096 if len(return_type) > 2 and return_type[-1].name == '::': 1097 return_type, in_class = \ 1098 self._GetReturnTypeAndClassName(return_type) 1099 return Method(indices.start, indices.end, name.name, in_class, 1100 return_type, parameters, modifiers, templated_types, 1101 body, self.namespace_stack) 1102 return Function(indices.start, indices.end, name.name, return_type, 1103 parameters, modifiers, templated_types, body, 1104 self.namespace_stack) 1105 1106 def _GetReturnTypeAndClassName(self, token_seq): 1107 # Splitting the return type from the class name in a method 1108 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). 1109 # Where is the return type and where is the class name? 1110 # The heuristic used is to pull the last name as the class name. 1111 # This includes all the templated type info. 1112 # TODO(nnorwitz): if there is only One name like in the 1113 # example above, punt and assume the last bit is the class name. 1114 1115 # Ignore a :: prefix, if exists so we can find the first real name. 1116 i = 0 1117 if token_seq[0].name == '::': 1118 i = 1 1119 # Ignore a :: suffix, if exists. 1120 end = len(token_seq) - 1 1121 if token_seq[end-1].name == '::': 1122 end -= 1 1123 1124 # Make a copy of the sequence so we can append a sentinel 1125 # value. This is required for GetName will has to have some 1126 # terminating condition beyond the last name. 1127 seq_copy = token_seq[i:end] 1128 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) 1129 names = [] 1130 while i < end: 1131 # Iterate through the sequence parsing out each name. 1132 new_name, next = self.GetName(seq_copy[i:]) 1133 assert new_name, 'Got empty new_name, next=%s' % next 1134 # We got a pointer or ref. Add it to the name. 1135 if next and next.token_type == tokenize.SYNTAX: 1136 new_name.append(next) 1137 names.append(new_name) 1138 i += len(new_name) 1139 1140 # Now that we have the names, it's time to undo what we did. 1141 1142 # Remove the sentinel value. 1143 names[-1].pop() 1144 # Flatten the token sequence for the return type. 1145 return_type = [e for seq in names[:-1] for e in seq] 1146 # The class name is the last name. 1147 class_name = names[-1] 1148 return return_type, class_name 1149 1150 def handle_bool(self): 1151 pass 1152 1153 def handle_char(self): 1154 pass 1155 1156 def handle_int(self): 1157 pass 1158 1159 def handle_long(self): 1160 pass 1161 1162 def handle_short(self): 1163 pass 1164 1165 def handle_double(self): 1166 pass 1167 1168 def handle_float(self): 1169 pass 1170 1171 def handle_void(self): 1172 pass 1173 1174 def handle_wchar_t(self): 1175 pass 1176 1177 def handle_unsigned(self): 1178 pass 1179 1180 def handle_signed(self): 1181 pass 1182 1183 def _GetNestedType(self, ctor): 1184 name = None 1185 name_tokens, token = self.GetName() 1186 if name_tokens: 1187 name = ''.join([t.name for t in name_tokens]) 1188 1189 # Handle forward declarations. 1190 if token.token_type == tokenize.SYNTAX and token.name == ';': 1191 return ctor(token.start, token.end, name, None, 1192 self.namespace_stack) 1193 1194 if token.token_type == tokenize.NAME and self._handling_typedef: 1195 self._AddBackToken(token) 1196 return ctor(token.start, token.end, name, None, 1197 self.namespace_stack) 1198 1199 # Must be the type declaration. 1200 fields = list(self._GetMatchingChar('{', '}')) 1201 del fields[-1] # Remove trailing '}'. 1202 if token.token_type == tokenize.SYNTAX and token.name == '{': 1203 next = self._GetNextToken() 1204 new_type = ctor(token.start, token.end, name, fields, 1205 self.namespace_stack) 1206 # A name means this is an anonymous type and the name 1207 # is the variable declaration. 1208 if next.token_type != tokenize.NAME: 1209 return new_type 1210 name = new_type 1211 token = next 1212 1213 # Must be variable declaration using the type prefixed with keyword. 1214 assert token.token_type == tokenize.NAME, token 1215 return self._CreateVariable(token, token.name, name, [], '', None) 1216 1217 def handle_struct(self): 1218 # Special case the handling typedef/aliasing of structs here. 1219 # It would be a pain to handle in the class code. 1220 name_tokens, var_token = self.GetName() 1221 if name_tokens: 1222 next_token = self._GetNextToken() 1223 is_syntax = (var_token.token_type == tokenize.SYNTAX and 1224 var_token.name[0] in '*&') 1225 is_variable = (var_token.token_type == tokenize.NAME and 1226 next_token.name == ';') 1227 variable = var_token 1228 if is_syntax and not is_variable: 1229 variable = next_token 1230 temp = self._GetNextToken() 1231 if temp.token_type == tokenize.SYNTAX and temp.name == '(': 1232 # Handle methods declared to return a struct. 1233 t0 = name_tokens[0] 1234 struct = tokenize.Token(tokenize.NAME, 'struct', 1235 t0.start-7, t0.start-2) 1236 type_and_name = [struct] 1237 type_and_name.extend(name_tokens) 1238 type_and_name.extend((var_token, next_token)) 1239 return self._GetMethod(type_and_name, 0, None, False) 1240 assert temp.name == ';', (temp, name_tokens, var_token) 1241 if is_syntax or (is_variable and not self._handling_typedef): 1242 modifiers = ['struct'] 1243 type_name = ''.join([t.name for t in name_tokens]) 1244 position = name_tokens[0] 1245 return self._CreateVariable(position, variable.name, type_name, 1246 modifiers, var_token.name, None) 1247 name_tokens.extend((var_token, next_token)) 1248 self._AddBackTokens(name_tokens) 1249 else: 1250 self._AddBackToken(var_token) 1251 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) 1252 1253 def handle_union(self): 1254 return self._GetNestedType(Union) 1255 1256 def handle_enum(self): 1257 return self._GetNestedType(Enum) 1258 1259 def handle_auto(self): 1260 # TODO(nnorwitz): warn about using auto? Probably not since it 1261 # will be reclaimed and useful for C++0x. 1262 pass 1263 1264 def handle_register(self): 1265 pass 1266 1267 def handle_const(self): 1268 pass 1269 1270 def handle_inline(self): 1271 pass 1272 1273 def handle_extern(self): 1274 pass 1275 1276 def handle_static(self): 1277 pass 1278 1279 def handle_virtual(self): 1280 # What follows must be a method. 1281 token = token2 = self._GetNextToken() 1282 if token.name == 'inline': 1283 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. 1284 token2 = self._GetNextToken() 1285 if token2.token_type == tokenize.SYNTAX and token2.name == '~': 1286 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) 1287 assert token.token_type == tokenize.NAME or token.name == '::', token 1288 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') 1289 return_type_and_name.insert(0, token) 1290 if token2 is not token: 1291 return_type_and_name.insert(1, token2) 1292 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, 1293 None, False) 1294 1295 def handle_volatile(self): 1296 pass 1297 1298 def handle_mutable(self): 1299 pass 1300 1301 def handle_public(self): 1302 assert self.in_class 1303 self.visibility = VISIBILITY_PUBLIC 1304 1305 def handle_protected(self): 1306 assert self.in_class 1307 self.visibility = VISIBILITY_PROTECTED 1308 1309 def handle_private(self): 1310 assert self.in_class 1311 self.visibility = VISIBILITY_PRIVATE 1312 1313 def handle_friend(self): 1314 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1315 assert tokens 1316 t0 = tokens[0] 1317 return Friend(t0.start, t0.end, tokens, self.namespace_stack) 1318 1319 def handle_static_cast(self): 1320 pass 1321 1322 def handle_const_cast(self): 1323 pass 1324 1325 def handle_dynamic_cast(self): 1326 pass 1327 1328 def handle_reinterpret_cast(self): 1329 pass 1330 1331 def handle_new(self): 1332 pass 1333 1334 def handle_delete(self): 1335 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1336 assert tokens 1337 return Delete(tokens[0].start, tokens[0].end, tokens) 1338 1339 def handle_typedef(self): 1340 token = self._GetNextToken() 1341 if (token.token_type == tokenize.NAME and 1342 keywords.IsKeyword(token.name)): 1343 # Token must be struct/enum/union/class. 1344 method = getattr(self, 'handle_' + token.name) 1345 self._handling_typedef = True 1346 tokens = [method()] 1347 self._handling_typedef = False 1348 else: 1349 tokens = [token] 1350 1351 # Get the remainder of the typedef up to the semi-colon. 1352 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) 1353 1354 # TODO(nnorwitz): clean all this up. 1355 assert tokens 1356 name = tokens.pop() 1357 indices = name 1358 if tokens: 1359 indices = tokens[0] 1360 if not indices: 1361 indices = token 1362 if name.name == ')': 1363 # HACK(nnorwitz): Handle pointers to functions "properly". 1364 if (len(tokens) >= 4 and 1365 tokens[1].name == '(' and tokens[2].name == '*'): 1366 tokens.append(name) 1367 name = tokens[3] 1368 elif name.name == ']': 1369 # HACK(nnorwitz): Handle arrays properly. 1370 if len(tokens) >= 2: 1371 tokens.append(name) 1372 name = tokens[1] 1373 new_type = tokens 1374 if tokens and isinstance(tokens[0], tokenize.Token): 1375 new_type = self.converter.ToType(tokens)[0] 1376 return Typedef(indices.start, indices.end, name.name, 1377 new_type, self.namespace_stack) 1378 1379 def handle_typeid(self): 1380 pass # Not needed yet. 1381 1382 def handle_typename(self): 1383 pass # Not needed yet. 1384 1385 def _GetTemplatedTypes(self): 1386 result = {} 1387 tokens = list(self._GetMatchingChar('<', '>')) 1388 len_tokens = len(tokens) - 1 # Ignore trailing '>'. 1389 i = 0 1390 while i < len_tokens: 1391 key = tokens[i].name 1392 i += 1 1393 if keywords.IsKeyword(key) or key == ',': 1394 continue 1395 type_name = default = None 1396 if i < len_tokens: 1397 i += 1 1398 if tokens[i-1].name == '=': 1399 assert i < len_tokens, '%s %s' % (i, tokens) 1400 default, unused_next_token = self.GetName(tokens[i:]) 1401 i += len(default) 1402 else: 1403 if tokens[i-1].name != ',': 1404 # We got something like: Type variable. 1405 # Re-adjust the key (variable) and type_name (Type). 1406 key = tokens[i-1].name 1407 type_name = tokens[i-2] 1408 1409 result[key] = (type_name, default) 1410 return result 1411 1412 def handle_template(self): 1413 token = self._GetNextToken() 1414 assert token.token_type == tokenize.SYNTAX, token 1415 assert token.name == '<', token 1416 templated_types = self._GetTemplatedTypes() 1417 # TODO(nnorwitz): for now, just ignore the template params. 1418 token = self._GetNextToken() 1419 if token.token_type == tokenize.NAME: 1420 if token.name == 'class': 1421 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types) 1422 elif token.name == 'struct': 1423 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types) 1424 elif token.name == 'friend': 1425 return self.handle_friend() 1426 self._AddBackToken(token) 1427 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') 1428 tokens.append(last) 1429 self._AddBackTokens(tokens) 1430 if last.name == '(': 1431 return self.GetMethod(FUNCTION_NONE, templated_types) 1432 # Must be a variable definition. 1433 return None 1434 1435 def handle_true(self): 1436 pass # Nothing to do. 1437 1438 def handle_false(self): 1439 pass # Nothing to do. 1440 1441 def handle_asm(self): 1442 pass # Not needed yet. 1443 1444 def handle_class(self): 1445 return self._GetClass(Class, VISIBILITY_PRIVATE, None) 1446 1447 def _GetBases(self): 1448 # Get base classes. 1449 bases = [] 1450 while 1: 1451 token = self._GetNextToken() 1452 assert token.token_type == tokenize.NAME, token 1453 # TODO(nnorwitz): store kind of inheritance...maybe. 1454 if token.name not in ('public', 'protected', 'private'): 1455 # If inheritance type is not specified, it is private. 1456 # Just put the token back so we can form a name. 1457 # TODO(nnorwitz): it would be good to warn about this. 1458 self._AddBackToken(token) 1459 else: 1460 # Check for virtual inheritance. 1461 token = self._GetNextToken() 1462 if token.name != 'virtual': 1463 self._AddBackToken(token) 1464 else: 1465 # TODO(nnorwitz): store that we got virtual for this base. 1466 pass 1467 base, next_token = self.GetName() 1468 bases_ast = self.converter.ToType(base) 1469 assert len(bases_ast) == 1, bases_ast 1470 bases.append(bases_ast[0]) 1471 assert next_token.token_type == tokenize.SYNTAX, next_token 1472 if next_token.name == '{': 1473 token = next_token 1474 break 1475 # Support multiple inheritance. 1476 assert next_token.name == ',', next_token 1477 return bases, token 1478 1479 def _GetClass(self, class_type, visibility, templated_types): 1480 class_name = None 1481 class_token = self._GetNextToken() 1482 if class_token.token_type != tokenize.NAME: 1483 assert class_token.token_type == tokenize.SYNTAX, class_token 1484 token = class_token 1485 else: 1486 # Skip any macro (e.g. storage class specifiers) after the 1487 # 'class' keyword. 1488 next_token = self._GetNextToken() 1489 if next_token.token_type == tokenize.NAME: 1490 self._AddBackToken(next_token) 1491 else: 1492 self._AddBackTokens([class_token, next_token]) 1493 name_tokens, token = self.GetName() 1494 class_name = ''.join([t.name for t in name_tokens]) 1495 bases = None 1496 if token.token_type == tokenize.SYNTAX: 1497 if token.name == ';': 1498 # Forward declaration. 1499 return class_type(class_token.start, class_token.end, 1500 class_name, None, templated_types, None, 1501 self.namespace_stack) 1502 if token.name in '*&': 1503 # Inline forward declaration. Could be method or data. 1504 name_token = self._GetNextToken() 1505 next_token = self._GetNextToken() 1506 if next_token.name == ';': 1507 # Handle data 1508 modifiers = ['class'] 1509 return self._CreateVariable(class_token, name_token.name, 1510 class_name, 1511 modifiers, token.name, None) 1512 else: 1513 # Assume this is a method. 1514 tokens = (class_token, token, name_token, next_token) 1515 self._AddBackTokens(tokens) 1516 return self.GetMethod(FUNCTION_NONE, None) 1517 if token.name == ':': 1518 bases, token = self._GetBases() 1519 1520 body = None 1521 if token.token_type == tokenize.SYNTAX and token.name == '{': 1522 assert token.token_type == tokenize.SYNTAX, token 1523 assert token.name == '{', token 1524 1525 ast = AstBuilder(self.GetScope(), self.filename, class_name, 1526 visibility, self.namespace_stack) 1527 body = list(ast.Generate()) 1528 1529 if not self._handling_typedef: 1530 token = self._GetNextToken() 1531 if token.token_type != tokenize.NAME: 1532 assert token.token_type == tokenize.SYNTAX, token 1533 assert token.name == ';', token 1534 else: 1535 new_class = class_type(class_token.start, class_token.end, 1536 class_name, bases, None, 1537 body, self.namespace_stack) 1538 1539 modifiers = [] 1540 return self._CreateVariable(class_token, 1541 token.name, new_class, 1542 modifiers, token.name, None) 1543 else: 1544 if not self._handling_typedef: 1545 self.HandleError('non-typedef token', token) 1546 self._AddBackToken(token) 1547 1548 return class_type(class_token.start, class_token.end, class_name, 1549 bases, None, body, self.namespace_stack) 1550 1551 def handle_namespace(self): 1552 token = self._GetNextToken() 1553 # Support anonymous namespaces. 1554 name = None 1555 if token.token_type == tokenize.NAME: 1556 name = token.name 1557 token = self._GetNextToken() 1558 self.namespace_stack.append(name) 1559 assert token.token_type == tokenize.SYNTAX, token 1560 # Create an internal token that denotes when the namespace is complete. 1561 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, 1562 None, None) 1563 internal_token.whence = token.whence 1564 if token.name == '=': 1565 # TODO(nnorwitz): handle aliasing namespaces. 1566 name, next_token = self.GetName() 1567 assert next_token.name == ';', next_token 1568 self._AddBackToken(internal_token) 1569 else: 1570 assert token.name == '{', token 1571 tokens = list(self.GetScope()) 1572 # Replace the trailing } with the internal namespace pop token. 1573 tokens[-1] = internal_token 1574 # Handle namespace with nothing in it. 1575 self._AddBackTokens(tokens) 1576 return None 1577 1578 def handle_using(self): 1579 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1580 assert tokens 1581 return Using(tokens[0].start, tokens[0].end, tokens) 1582 1583 def handle_explicit(self): 1584 assert self.in_class 1585 # Nothing much to do. 1586 # TODO(nnorwitz): maybe verify the method name == class name. 1587 # This must be a ctor. 1588 return self.GetMethod(FUNCTION_CTOR, None) 1589 1590 def handle_this(self): 1591 pass # Nothing to do. 1592 1593 def handle_operator(self): 1594 # Pull off the next token(s?) and make that part of the method name. 1595 pass 1596 1597 def handle_sizeof(self): 1598 pass 1599 1600 def handle_case(self): 1601 pass 1602 1603 def handle_switch(self): 1604 pass 1605 1606 def handle_default(self): 1607 token = self._GetNextToken() 1608 assert token.token_type == tokenize.SYNTAX 1609 assert token.name == ':' 1610 1611 def handle_if(self): 1612 pass 1613 1614 def handle_else(self): 1615 pass 1616 1617 def handle_return(self): 1618 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1619 if not tokens: 1620 return Return(self.current_token.start, self.current_token.end, None) 1621 return Return(tokens[0].start, tokens[0].end, tokens) 1622 1623 def handle_goto(self): 1624 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') 1625 assert len(tokens) == 1, str(tokens) 1626 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) 1627 1628 def handle_try(self): 1629 pass # Not needed yet. 1630 1631 def handle_catch(self): 1632 pass # Not needed yet. 1633 1634 def handle_throw(self): 1635 pass # Not needed yet. 1636 1637 def handle_while(self): 1638 pass 1639 1640 def handle_do(self): 1641 pass 1642 1643 def handle_for(self): 1644 pass 1645 1646 def handle_break(self): 1647 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1648 1649 def handle_continue(self): 1650 self._IgnoreUpTo(tokenize.SYNTAX, ';') 1651 1652 1653def BuilderFromSource(source, filename): 1654 """Utility method that returns an AstBuilder from source code. 1655 1656 Args: 1657 source: 'C++ source code' 1658 filename: 'file1' 1659 1660 Returns: 1661 AstBuilder 1662 """ 1663 return AstBuilder(tokenize.GetTokens(source), filename) 1664 1665 1666def PrintIndentifiers(filename, should_print): 1667 """Prints all identifiers for a C++ source file. 1668 1669 Args: 1670 filename: 'file1' 1671 should_print: predicate with signature: bool Function(token) 1672 """ 1673 source = utils.ReadFile(filename, False) 1674 if source is None: 1675 sys.stderr.write('Unable to find: %s\n' % filename) 1676 return 1677 1678 #print('Processing %s' % actual_filename) 1679 builder = BuilderFromSource(source, filename) 1680 try: 1681 for node in builder.Generate(): 1682 if should_print(node): 1683 print(node.name) 1684 except KeyboardInterrupt: 1685 return 1686 except: 1687 pass 1688 1689 1690def PrintAllIndentifiers(filenames, should_print): 1691 """Prints all identifiers for each C++ source file in filenames. 1692 1693 Args: 1694 filenames: ['file1', 'file2', ...] 1695 should_print: predicate with signature: bool Function(token) 1696 """ 1697 for path in filenames: 1698 PrintIndentifiers(path, should_print) 1699 1700 1701def main(argv): 1702 for filename in argv[1:]: 1703 source = utils.ReadFile(filename) 1704 if source is None: 1705 continue 1706 1707 print('Processing %s' % filename) 1708 builder = BuilderFromSource(source, filename) 1709 try: 1710 entire_ast = filter(None, builder.Generate()) 1711 except KeyboardInterrupt: 1712 return 1713 except: 1714 # Already printed a warning, print the traceback and continue. 1715 traceback.print_exc() 1716 else: 1717 if utils.DEBUG: 1718 for ast in entire_ast: 1719 print(ast) 1720 1721 1722if __name__ == '__main__': 1723 main(sys.argv) 1724