1#!/usr/bin/env python
2#
3# Copyright 2007 Neal Norwitz
4# Portions Copyright 2007 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Generate an Abstract Syntax Tree (AST) for C++."""
19
20__author__ = 'nnorwitz@google.com (Neal Norwitz)'
21
22
23# TODO:
24#  * Tokens should never be exported, need to convert to Nodes
25#    (return types, parameters, etc.)
26#  * Handle static class data for templatized classes
27#  * Handle casts (both C++ and C-style)
28#  * Handle conditions and loops (if/else, switch, for, while/do)
29#
30# TODO much, much later:
31#  * Handle #define
32#  * exceptions
33
34
35try:
36    # Python 3.x
37    import builtins
38except ImportError:
39    # Python 2.x
40    import __builtin__ as builtins
41
42import sys
43import traceback
44
45from cpp import keywords
46from cpp import tokenize
47from cpp import utils
48
49
50if not hasattr(builtins, 'reversed'):
51    # Support Python 2.3 and earlier.
52    def reversed(seq):
53        for i in range(len(seq)-1, -1, -1):
54            yield seq[i]
55
56if not hasattr(builtins, 'next'):
57    # Support Python 2.5 and earlier.
58    def next(obj):
59        return obj.next()
60
61
62VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
63
64FUNCTION_NONE = 0x00
65FUNCTION_CONST = 0x01
66FUNCTION_VIRTUAL = 0x02
67FUNCTION_PURE_VIRTUAL = 0x04
68FUNCTION_CTOR = 0x08
69FUNCTION_DTOR = 0x10
70FUNCTION_ATTRIBUTE = 0x20
71FUNCTION_UNKNOWN_ANNOTATION = 0x40
72FUNCTION_THROW = 0x80
73
74"""
75These are currently unused.  Should really handle these properly at some point.
76
77TYPE_MODIFIER_INLINE   = 0x010000
78TYPE_MODIFIER_EXTERN   = 0x020000
79TYPE_MODIFIER_STATIC   = 0x040000
80TYPE_MODIFIER_CONST    = 0x080000
81TYPE_MODIFIER_REGISTER = 0x100000
82TYPE_MODIFIER_VOLATILE = 0x200000
83TYPE_MODIFIER_MUTABLE  = 0x400000
84
85TYPE_MODIFIER_MAP = {
86    'inline': TYPE_MODIFIER_INLINE,
87    'extern': TYPE_MODIFIER_EXTERN,
88    'static': TYPE_MODIFIER_STATIC,
89    'const': TYPE_MODIFIER_CONST,
90    'register': TYPE_MODIFIER_REGISTER,
91    'volatile': TYPE_MODIFIER_VOLATILE,
92    'mutable': TYPE_MODIFIER_MUTABLE,
93    }
94"""
95
96_INTERNAL_TOKEN = 'internal'
97_NAMESPACE_POP = 'ns-pop'
98
99
100# TODO(nnorwitz): use this as a singleton for templated_types, etc
101# where we don't want to create a new empty dict each time.  It is also const.
102class _NullDict(object):
103    __contains__ = lambda self: False
104    keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
105
106
107# TODO(nnorwitz): move AST nodes into a separate module.
108class Node(object):
109    """Base AST node."""
110
111    def __init__(self, start, end):
112        self.start = start
113        self.end = end
114
115    def IsDeclaration(self):
116        """Returns bool if this node is a declaration."""
117        return False
118
119    def IsDefinition(self):
120        """Returns bool if this node is a definition."""
121        return False
122
123    def IsExportable(self):
124        """Returns bool if this node exportable from a header file."""
125        return False
126
127    def Requires(self, node):
128        """Does this AST node require the definition of the node passed in?"""
129        return False
130
131    def XXX__str__(self):
132        return self._StringHelper(self.__class__.__name__, '')
133
134    def _StringHelper(self, name, suffix):
135        if not utils.DEBUG:
136            return '%s(%s)' % (name, suffix)
137        return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
138
139    def __repr__(self):
140        return str(self)
141
142
143class Define(Node):
144    def __init__(self, start, end, name, definition):
145        Node.__init__(self, start, end)
146        self.name = name
147        self.definition = definition
148
149    def __str__(self):
150        value = '%s %s' % (self.name, self.definition)
151        return self._StringHelper(self.__class__.__name__, value)
152
153
154class Include(Node):
155    def __init__(self, start, end, filename, system):
156        Node.__init__(self, start, end)
157        self.filename = filename
158        self.system = system
159
160    def __str__(self):
161        fmt = '"%s"'
162        if self.system:
163            fmt = '<%s>'
164        return self._StringHelper(self.__class__.__name__, fmt % self.filename)
165
166
167class Goto(Node):
168    def __init__(self, start, end, label):
169        Node.__init__(self, start, end)
170        self.label = label
171
172    def __str__(self):
173        return self._StringHelper(self.__class__.__name__, str(self.label))
174
175
176class Expr(Node):
177    def __init__(self, start, end, expr):
178        Node.__init__(self, start, end)
179        self.expr = expr
180
181    def Requires(self, node):
182        # TODO(nnorwitz): impl.
183        return False
184
185    def __str__(self):
186        return self._StringHelper(self.__class__.__name__, str(self.expr))
187
188
189class Return(Expr):
190    pass
191
192
193class Delete(Expr):
194    pass
195
196
197class Friend(Expr):
198    def __init__(self, start, end, expr, namespace):
199        Expr.__init__(self, start, end, expr)
200        self.namespace = namespace[:]
201
202
203class Using(Node):
204    def __init__(self, start, end, names):
205        Node.__init__(self, start, end)
206        self.names = names
207
208    def __str__(self):
209        return self._StringHelper(self.__class__.__name__, str(self.names))
210
211
212class Parameter(Node):
213    def __init__(self, start, end, name, parameter_type, default):
214        Node.__init__(self, start, end)
215        self.name = name
216        self.type = parameter_type
217        self.default = default
218
219    def Requires(self, node):
220        # TODO(nnorwitz): handle namespaces, etc.
221        return self.type.name == node.name
222
223    def __str__(self):
224        name = str(self.type)
225        suffix = '%s %s' % (name, self.name)
226        if self.default:
227            suffix += ' = ' + ''.join([d.name for d in self.default])
228        return self._StringHelper(self.__class__.__name__, suffix)
229
230
231class _GenericDeclaration(Node):
232    def __init__(self, start, end, name, namespace):
233        Node.__init__(self, start, end)
234        self.name = name
235        self.namespace = namespace[:]
236
237    def FullName(self):
238        prefix = ''
239        if self.namespace and self.namespace[-1]:
240            prefix = '::'.join(self.namespace) + '::'
241        return prefix + self.name
242
243    def _TypeStringHelper(self, suffix):
244        if self.namespace:
245            names = [n or '<anonymous>' for n in self.namespace]
246            suffix += ' in ' + '::'.join(names)
247        return self._StringHelper(self.__class__.__name__, suffix)
248
249
250# TODO(nnorwitz): merge with Parameter in some way?
251class VariableDeclaration(_GenericDeclaration):
252    def __init__(self, start, end, name, var_type, initial_value, namespace):
253        _GenericDeclaration.__init__(self, start, end, name, namespace)
254        self.type = var_type
255        self.initial_value = initial_value
256
257    def Requires(self, node):
258        # TODO(nnorwitz): handle namespaces, etc.
259        return self.type.name == node.name
260
261    def ToString(self):
262        """Return a string that tries to reconstitute the variable decl."""
263        suffix = '%s %s' % (self.type, self.name)
264        if self.initial_value:
265            suffix += ' = ' + self.initial_value
266        return suffix
267
268    def __str__(self):
269        return self._StringHelper(self.__class__.__name__, self.ToString())
270
271
272class Typedef(_GenericDeclaration):
273    def __init__(self, start, end, name, alias, namespace):
274        _GenericDeclaration.__init__(self, start, end, name, namespace)
275        self.alias = alias
276
277    def IsDefinition(self):
278        return True
279
280    def IsExportable(self):
281        return True
282
283    def Requires(self, node):
284        # TODO(nnorwitz): handle namespaces, etc.
285        name = node.name
286        for token in self.alias:
287            if token is not None and name == token.name:
288                return True
289        return False
290
291    def __str__(self):
292        suffix = '%s, %s' % (self.name, self.alias)
293        return self._TypeStringHelper(suffix)
294
295
296class _NestedType(_GenericDeclaration):
297    def __init__(self, start, end, name, fields, namespace):
298        _GenericDeclaration.__init__(self, start, end, name, namespace)
299        self.fields = fields
300
301    def IsDefinition(self):
302        return True
303
304    def IsExportable(self):
305        return True
306
307    def __str__(self):
308        suffix = '%s, {%s}' % (self.name, self.fields)
309        return self._TypeStringHelper(suffix)
310
311
312class Union(_NestedType):
313    pass
314
315
316class Enum(_NestedType):
317    pass
318
319
320class Class(_GenericDeclaration):
321    def __init__(self, start, end, name, bases, templated_types, body, namespace):
322        _GenericDeclaration.__init__(self, start, end, name, namespace)
323        self.bases = bases
324        self.body = body
325        self.templated_types = templated_types
326
327    def IsDeclaration(self):
328        return self.bases is None and self.body is None
329
330    def IsDefinition(self):
331        return not self.IsDeclaration()
332
333    def IsExportable(self):
334        return not self.IsDeclaration()
335
336    def Requires(self, node):
337        # TODO(nnorwitz): handle namespaces, etc.
338        if self.bases:
339            for token_list in self.bases:
340                # TODO(nnorwitz): bases are tokens, do name comparision.
341                for token in token_list:
342                    if token.name == node.name:
343                        return True
344        # TODO(nnorwitz): search in body too.
345        return False
346
347    def __str__(self):
348        name = self.name
349        if self.templated_types:
350            name += '<%s>' % self.templated_types
351        suffix = '%s, %s, %s' % (name, self.bases, self.body)
352        return self._TypeStringHelper(suffix)
353
354
355class Struct(Class):
356    pass
357
358
359class Function(_GenericDeclaration):
360    def __init__(self, start, end, name, return_type, parameters,
361                 modifiers, templated_types, body, namespace):
362        _GenericDeclaration.__init__(self, start, end, name, namespace)
363        converter = TypeConverter(namespace)
364        self.return_type = converter.CreateReturnType(return_type)
365        self.parameters = converter.ToParameters(parameters)
366        self.modifiers = modifiers
367        self.body = body
368        self.templated_types = templated_types
369
370    def IsDeclaration(self):
371        return self.body is None
372
373    def IsDefinition(self):
374        return self.body is not None
375
376    def IsExportable(self):
377        if self.return_type and 'static' in self.return_type.modifiers:
378            return False
379        return None not in self.namespace
380
381    def Requires(self, node):
382        if self.parameters:
383            # TODO(nnorwitz): parameters are tokens, do name comparision.
384            for p in self.parameters:
385                if p.name == node.name:
386                    return True
387        # TODO(nnorwitz): search in body too.
388        return False
389
390    def __str__(self):
391        # TODO(nnorwitz): add templated_types.
392        suffix = ('%s %s(%s), 0x%02x, %s' %
393                  (self.return_type, self.name, self.parameters,
394                   self.modifiers, self.body))
395        return self._TypeStringHelper(suffix)
396
397
398class Method(Function):
399    def __init__(self, start, end, name, in_class, return_type, parameters,
400                 modifiers, templated_types, body, namespace):
401        Function.__init__(self, start, end, name, return_type, parameters,
402                          modifiers, templated_types, body, namespace)
403        # TODO(nnorwitz): in_class could also be a namespace which can
404        # mess up finding functions properly.
405        self.in_class = in_class
406
407
408class Type(_GenericDeclaration):
409    """Type used for any variable (eg class, primitive, struct, etc)."""
410
411    def __init__(self, start, end, name, templated_types, modifiers,
412                 reference, pointer, array):
413        """
414        Args:
415          name: str name of main type
416          templated_types: [Class (Type?)] template type info between <>
417          modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
418          reference, pointer, array: bools
419        """
420        _GenericDeclaration.__init__(self, start, end, name, [])
421        self.templated_types = templated_types
422        if not name and modifiers:
423            self.name = modifiers.pop()
424        self.modifiers = modifiers
425        self.reference = reference
426        self.pointer = pointer
427        self.array = array
428
429    def __str__(self):
430        prefix = ''
431        if self.modifiers:
432            prefix = ' '.join(self.modifiers) + ' '
433        name = str(self.name)
434        if self.templated_types:
435            name += '<%s>' % self.templated_types
436        suffix = prefix + name
437        if self.reference:
438            suffix += '&'
439        if self.pointer:
440            suffix += '*'
441        if self.array:
442            suffix += '[]'
443        return self._TypeStringHelper(suffix)
444
445    # By definition, Is* are always False.  A Type can only exist in
446    # some sort of variable declaration, parameter, or return value.
447    def IsDeclaration(self):
448        return False
449
450    def IsDefinition(self):
451        return False
452
453    def IsExportable(self):
454        return False
455
456
457class TypeConverter(object):
458
459    def __init__(self, namespace_stack):
460        self.namespace_stack = namespace_stack
461
462    def _GetTemplateEnd(self, tokens, start):
463        count = 1
464        end = start
465        while 1:
466            token = tokens[end]
467            end += 1
468            if token.name == '<':
469                count += 1
470            elif token.name == '>':
471                count -= 1
472                if count == 0:
473                    break
474        return tokens[start:end-1], end
475
476    def ToType(self, tokens):
477        """Convert [Token,...] to [Class(...), ] useful for base classes.
478        For example, code like class Foo : public Bar<x, y> { ... };
479        the "Bar<x, y>" portion gets converted to an AST.
480
481        Returns:
482          [Class(...), ...]
483        """
484        result = []
485        name_tokens = []
486        reference = pointer = array = False
487
488        def AddType(templated_types):
489            # Partition tokens into name and modifier tokens.
490            names = []
491            modifiers = []
492            for t in name_tokens:
493                if keywords.IsKeyword(t.name):
494                    modifiers.append(t.name)
495                else:
496                    names.append(t.name)
497            name = ''.join(names)
498            result.append(Type(name_tokens[0].start, name_tokens[-1].end,
499                               name, templated_types, modifiers,
500                               reference, pointer, array))
501            del name_tokens[:]
502
503        i = 0
504        end = len(tokens)
505        while i < end:
506            token = tokens[i]
507            if token.name == '<':
508                new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
509                AddType(self.ToType(new_tokens))
510                # If there is a comma after the template, we need to consume
511                # that here otherwise it becomes part of the name.
512                i = new_end
513                reference = pointer = array = False
514            elif token.name == ',':
515                AddType([])
516                reference = pointer = array = False
517            elif token.name == '*':
518                pointer = True
519            elif token.name == '&':
520                reference = True
521            elif token.name == '[':
522               pointer = True
523            elif token.name == ']':
524                pass
525            else:
526                name_tokens.append(token)
527            i += 1
528
529        if name_tokens:
530            # No '<' in the tokens, just a simple name and no template.
531            AddType([])
532        return result
533
534    def DeclarationToParts(self, parts, needs_name_removed):
535        name = None
536        default = []
537        if needs_name_removed:
538            # Handle default (initial) values properly.
539            for i, t in enumerate(parts):
540                if t.name == '=':
541                    default = parts[i+1:]
542                    name = parts[i-1].name
543                    if name == ']' and parts[i-2].name == '[':
544                        name = parts[i-3].name
545                        i -= 1
546                    parts = parts[:i-1]
547                    break
548            else:
549                if parts[-1].token_type == tokenize.NAME:
550                    name = parts.pop().name
551                else:
552                    # TODO(nnorwitz): this is a hack that happens for code like
553                    # Register(Foo<T>); where it thinks this is a function call
554                    # but it's actually a declaration.
555                    name = '???'
556        modifiers = []
557        type_name = []
558        other_tokens = []
559        templated_types = []
560        i = 0
561        end = len(parts)
562        while i < end:
563            p = parts[i]
564            if keywords.IsKeyword(p.name):
565                modifiers.append(p.name)
566            elif p.name == '<':
567                templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
568                templated_types = self.ToType(templated_tokens)
569                i = new_end - 1
570                # Don't add a spurious :: to data members being initialized.
571                next_index = i + 1
572                if next_index < end and parts[next_index].name == '::':
573                    i += 1
574            elif p.name in ('[', ']', '='):
575                # These are handled elsewhere.
576                other_tokens.append(p)
577            elif p.name not in ('*', '&', '>'):
578                # Ensure that names have a space between them.
579                if (type_name and type_name[-1].token_type == tokenize.NAME and
580                    p.token_type == tokenize.NAME):
581                    type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
582                type_name.append(p)
583            else:
584                other_tokens.append(p)
585            i += 1
586        type_name = ''.join([t.name for t in type_name])
587        return name, type_name, templated_types, modifiers, default, other_tokens
588
589    def ToParameters(self, tokens):
590        if not tokens:
591            return []
592
593        result = []
594        name = type_name = ''
595        type_modifiers = []
596        pointer = reference = array = False
597        first_token = None
598        default = []
599
600        def AddParameter():
601            if default:
602                del default[0]  # Remove flag.
603            end = type_modifiers[-1].end
604            parts = self.DeclarationToParts(type_modifiers, True)
605            (name, type_name, templated_types, modifiers,
606             unused_default, unused_other_tokens) = parts
607            parameter_type = Type(first_token.start, first_token.end,
608                                  type_name, templated_types, modifiers,
609                                  reference, pointer, array)
610            p = Parameter(first_token.start, end, name,
611                          parameter_type, default)
612            result.append(p)
613
614        template_count = 0
615        for s in tokens:
616            if not first_token:
617                first_token = s
618            if s.name == '<':
619                template_count += 1
620            elif s.name == '>':
621                template_count -= 1
622            if template_count > 0:
623                type_modifiers.append(s)
624                continue
625
626            if s.name == ',':
627                AddParameter()
628                name = type_name = ''
629                type_modifiers = []
630                pointer = reference = array = False
631                first_token = None
632                default = []
633            elif s.name == '*':
634                pointer = True
635            elif s.name == '&':
636                reference = True
637            elif s.name == '[':
638                array = True
639            elif s.name == ']':
640                pass  # Just don't add to type_modifiers.
641            elif s.name == '=':
642                # Got a default value.  Add any value (None) as a flag.
643                default.append(None)
644            elif default:
645                default.append(s)
646            else:
647                type_modifiers.append(s)
648        AddParameter()
649        return result
650
651    def CreateReturnType(self, return_type_seq):
652        if not return_type_seq:
653            return None
654        start = return_type_seq[0].start
655        end = return_type_seq[-1].end
656        _, name, templated_types, modifiers, default, other_tokens = \
657           self.DeclarationToParts(return_type_seq, False)
658        names = [n.name for n in other_tokens]
659        reference = '&' in names
660        pointer = '*' in names
661        array = '[' in names
662        return Type(start, end, name, templated_types, modifiers,
663                    reference, pointer, array)
664
665    def GetTemplateIndices(self, names):
666        # names is a list of strings.
667        start = names.index('<')
668        end = len(names) - 1
669        while end > 0:
670            if names[end] == '>':
671                break
672            end -= 1
673        return start, end+1
674
675class AstBuilder(object):
676    def __init__(self, token_stream, filename, in_class='', visibility=None,
677                 namespace_stack=[]):
678        self.tokens = token_stream
679        self.filename = filename
680        # TODO(nnorwitz): use a better data structure (deque) for the queue.
681        # Switching directions of the "queue" improved perf by about 25%.
682        # Using a deque should be even better since we access from both sides.
683        self.token_queue = []
684        self.namespace_stack = namespace_stack[:]
685        self.in_class = in_class
686        if in_class is None:
687            self.in_class_name_only = None
688        else:
689            self.in_class_name_only = in_class.split('::')[-1]
690        self.visibility = visibility
691        self.in_function = False
692        self.current_token = None
693        # Keep the state whether we are currently handling a typedef or not.
694        self._handling_typedef = False
695
696        self.converter = TypeConverter(self.namespace_stack)
697
698    def HandleError(self, msg, token):
699        printable_queue = list(reversed(self.token_queue[-20:]))
700        sys.stderr.write('Got %s in %s @ %s %s\n' %
701                         (msg, self.filename, token, printable_queue))
702
703    def Generate(self):
704        while 1:
705            token = self._GetNextToken()
706            if not token:
707                break
708
709            # Get the next token.
710            self.current_token = token
711
712            # Dispatch on the next token type.
713            if token.token_type == _INTERNAL_TOKEN:
714                if token.name == _NAMESPACE_POP:
715                    self.namespace_stack.pop()
716                continue
717
718            try:
719                result = self._GenerateOne(token)
720                if result is not None:
721                    yield result
722            except:
723                self.HandleError('exception', token)
724                raise
725
726    def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
727                        ref_pointer_name_seq, templated_types, value=None):
728        reference = '&' in ref_pointer_name_seq
729        pointer = '*' in ref_pointer_name_seq
730        array = '[' in ref_pointer_name_seq
731        var_type = Type(pos_token.start, pos_token.end, type_name,
732                        templated_types, type_modifiers,
733                        reference, pointer, array)
734        return VariableDeclaration(pos_token.start, pos_token.end,
735                                   name, var_type, value, self.namespace_stack)
736
737    def _GenerateOne(self, token):
738        if token.token_type == tokenize.NAME:
739            if (keywords.IsKeyword(token.name) and
740                not keywords.IsBuiltinType(token.name)):
741                method = getattr(self, 'handle_' + token.name)
742                return method()
743            elif token.name == self.in_class_name_only:
744                # The token name is the same as the class, must be a ctor if
745                # there is a paren.  Otherwise, it's the return type.
746                # Peek ahead to get the next token to figure out which.
747                next = self._GetNextToken()
748                self._AddBackToken(next)
749                if next.token_type == tokenize.SYNTAX and next.name == '(':
750                    return self._GetMethod([token], FUNCTION_CTOR, None, True)
751                # Fall through--handle like any other method.
752
753            # Handle data or function declaration/definition.
754            syntax = tokenize.SYNTAX
755            temp_tokens, last_token = \
756                self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
757            temp_tokens.insert(0, token)
758            if last_token.name == '(':
759                # If there is an assignment before the paren,
760                # this is an expression, not a method.
761                expr = bool([e for e in temp_tokens if e.name == '='])
762                if expr:
763                    new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
764                    temp_tokens.append(last_token)
765                    temp_tokens.extend(new_temp)
766                    last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
767
768            if last_token.name == '[':
769                # Handle array, this isn't a method, unless it's an operator.
770                # TODO(nnorwitz): keep the size somewhere.
771                # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
772                temp_tokens.append(last_token)
773                if temp_tokens[-2].name == 'operator':
774                    temp_tokens.append(self._GetNextToken())
775                else:
776                    temp_tokens2, last_token = \
777                        self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
778                    temp_tokens.extend(temp_tokens2)
779
780            if last_token.name == ';':
781                # Handle data, this isn't a method.
782                parts = self.converter.DeclarationToParts(temp_tokens, True)
783                (name, type_name, templated_types, modifiers, default,
784                 unused_other_tokens) = parts
785
786                t0 = temp_tokens[0]
787                names = [t.name for t in temp_tokens]
788                if templated_types:
789                    start, end = self.converter.GetTemplateIndices(names)
790                    names = names[:start] + names[end:]
791                default = ''.join([t.name for t in default])
792                return self._CreateVariable(t0, name, type_name, modifiers,
793                                            names, templated_types, default)
794            if last_token.name == '{':
795                self._AddBackTokens(temp_tokens[1:])
796                self._AddBackToken(last_token)
797                method_name = temp_tokens[0].name
798                method = getattr(self, 'handle_' + method_name, None)
799                if not method:
800                    # Must be declaring a variable.
801                    # TODO(nnorwitz): handle the declaration.
802                    return None
803                return method()
804            return self._GetMethod(temp_tokens, 0, None, False)
805        elif token.token_type == tokenize.SYNTAX:
806            if token.name == '~' and self.in_class:
807                # Must be a dtor (probably not in method body).
808                token = self._GetNextToken()
809                # self.in_class can contain A::Name, but the dtor will only
810                # be Name.  Make sure to compare against the right value.
811                if (token.token_type == tokenize.NAME and
812                    token.name == self.in_class_name_only):
813                    return self._GetMethod([token], FUNCTION_DTOR, None, True)
814            # TODO(nnorwitz): handle a lot more syntax.
815        elif token.token_type == tokenize.PREPROCESSOR:
816            # TODO(nnorwitz): handle more preprocessor directives.
817            # token starts with a #, so remove it and strip whitespace.
818            name = token.name[1:].lstrip()
819            if name.startswith('include'):
820                # Remove "include".
821                name = name[7:].strip()
822                assert name
823                # Handle #include \<newline> "header-on-second-line.h".
824                if name.startswith('\\'):
825                    name = name[1:].strip()
826                assert name[0] in '<"', token
827                assert name[-1] in '>"', token
828                system = name[0] == '<'
829                filename = name[1:-1]
830                return Include(token.start, token.end, filename, system)
831            if name.startswith('define'):
832                # Remove "define".
833                name = name[6:].strip()
834                assert name
835                value = ''
836                for i, c in enumerate(name):
837                    if c.isspace():
838                        value = name[i:].lstrip()
839                        name = name[:i]
840                        break
841                return Define(token.start, token.end, name, value)
842            if name.startswith('if') and name[2:3].isspace():
843                condition = name[3:].strip()
844                if condition.startswith('0') or condition.startswith('(0)'):
845                    self._SkipIf0Blocks()
846        return None
847
848    def _GetTokensUpTo(self, expected_token_type, expected_token):
849        return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
850
851    def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
852        last_token = self._GetNextToken()
853        tokens = []
854        while (last_token.token_type != expected_token_type or
855               last_token.name not in expected_tokens):
856            tokens.append(last_token)
857            last_token = self._GetNextToken()
858        return tokens, last_token
859
860    # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
861    def _IgnoreUpTo(self, token_type, token):
862        unused_tokens = self._GetTokensUpTo(token_type, token)
863
864    def _SkipIf0Blocks(self):
865        count = 1
866        while 1:
867            token = self._GetNextToken()
868            if token.token_type != tokenize.PREPROCESSOR:
869                continue
870
871            name = token.name[1:].lstrip()
872            if name.startswith('endif'):
873                count -= 1
874                if count == 0:
875                    break
876            elif name.startswith('if'):
877                count += 1
878
879    def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
880        if GetNextToken is None:
881            GetNextToken = self._GetNextToken
882        # Assumes the current token is open_paren and we will consume
883        # and return up to the close_paren.
884        count = 1
885        token = GetNextToken()
886        while 1:
887            if token.token_type == tokenize.SYNTAX:
888                if token.name == open_paren:
889                    count += 1
890                elif token.name == close_paren:
891                    count -= 1
892                    if count == 0:
893                        break
894            yield token
895            token = GetNextToken()
896        yield token
897
898    def _GetParameters(self):
899        return self._GetMatchingChar('(', ')')
900
901    def GetScope(self):
902        return self._GetMatchingChar('{', '}')
903
904    def _GetNextToken(self):
905        if self.token_queue:
906            return self.token_queue.pop()
907        return next(self.tokens)
908
909    def _AddBackToken(self, token):
910        if token.whence == tokenize.WHENCE_STREAM:
911            token.whence = tokenize.WHENCE_QUEUE
912            self.token_queue.insert(0, token)
913        else:
914            assert token.whence == tokenize.WHENCE_QUEUE, token
915            self.token_queue.append(token)
916
917    def _AddBackTokens(self, tokens):
918        if tokens:
919            if tokens[-1].whence == tokenize.WHENCE_STREAM:
920                for token in tokens:
921                    token.whence = tokenize.WHENCE_QUEUE
922                self.token_queue[:0] = reversed(tokens)
923            else:
924                assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
925                self.token_queue.extend(reversed(tokens))
926
927    def GetName(self, seq=None):
928        """Returns ([tokens], next_token_info)."""
929        GetNextToken = self._GetNextToken
930        if seq is not None:
931            it = iter(seq)
932            GetNextToken = lambda: next(it)
933        next_token = GetNextToken()
934        tokens = []
935        last_token_was_name = False
936        while (next_token.token_type == tokenize.NAME or
937               (next_token.token_type == tokenize.SYNTAX and
938                next_token.name in ('::', '<'))):
939            # Two NAMEs in a row means the identifier should terminate.
940            # It's probably some sort of variable declaration.
941            if last_token_was_name and next_token.token_type == tokenize.NAME:
942                break
943            last_token_was_name = next_token.token_type == tokenize.NAME
944            tokens.append(next_token)
945            # Handle templated names.
946            if next_token.name == '<':
947                tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
948                last_token_was_name = True
949            next_token = GetNextToken()
950        return tokens, next_token
951
952    def GetMethod(self, modifiers, templated_types):
953        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
954        assert len(return_type_and_name) >= 1
955        return self._GetMethod(return_type_and_name, modifiers, templated_types,
956                               False)
957
958    def _GetMethod(self, return_type_and_name, modifiers, templated_types,
959                   get_paren):
960        template_portion = None
961        if get_paren:
962            token = self._GetNextToken()
963            assert token.token_type == tokenize.SYNTAX, token
964            if token.name == '<':
965                # Handle templatized dtors.
966                template_portion = [token]
967                template_portion.extend(self._GetMatchingChar('<', '>'))
968                token = self._GetNextToken()
969            assert token.token_type == tokenize.SYNTAX, token
970            assert token.name == '(', token
971
972        name = return_type_and_name.pop()
973        # Handle templatized ctors.
974        if name.name == '>':
975            index = 1
976            while return_type_and_name[index].name != '<':
977                index += 1
978            template_portion = return_type_and_name[index:] + [name]
979            del return_type_and_name[index:]
980            name = return_type_and_name.pop()
981        elif name.name == ']':
982            rt = return_type_and_name
983            assert rt[-1].name == '[', return_type_and_name
984            assert rt[-2].name == 'operator', return_type_and_name
985            name_seq = return_type_and_name[-2:]
986            del return_type_and_name[-2:]
987            name = tokenize.Token(tokenize.NAME, 'operator[]',
988                                  name_seq[0].start, name.end)
989            # Get the open paren so _GetParameters() below works.
990            unused_open_paren = self._GetNextToken()
991
992        # TODO(nnorwitz): store template_portion.
993        return_type = return_type_and_name
994        indices = name
995        if return_type:
996            indices = return_type[0]
997
998        # Force ctor for templatized ctors.
999        if name.name == self.in_class and not modifiers:
1000            modifiers |= FUNCTION_CTOR
1001        parameters = list(self._GetParameters())
1002        del parameters[-1]              # Remove trailing ')'.
1003
1004        # Handling operator() is especially weird.
1005        if name.name == 'operator' and not parameters:
1006            token = self._GetNextToken()
1007            assert token.name == '(', token
1008            parameters = list(self._GetParameters())
1009            del parameters[-1]          # Remove trailing ')'.
1010
1011        token = self._GetNextToken()
1012        while token.token_type == tokenize.NAME:
1013            modifier_token = token
1014            token = self._GetNextToken()
1015            if modifier_token.name == 'const':
1016                modifiers |= FUNCTION_CONST
1017            elif modifier_token.name == '__attribute__':
1018                # TODO(nnorwitz): handle more __attribute__ details.
1019                modifiers |= FUNCTION_ATTRIBUTE
1020                assert token.name == '(', token
1021                # Consume everything between the (parens).
1022                unused_tokens = list(self._GetMatchingChar('(', ')'))
1023                token = self._GetNextToken()
1024            elif modifier_token.name == 'throw':
1025                modifiers |= FUNCTION_THROW
1026                assert token.name == '(', token
1027                # Consume everything between the (parens).
1028                unused_tokens = list(self._GetMatchingChar('(', ')'))
1029                token = self._GetNextToken()
1030            elif modifier_token.name == modifier_token.name.upper():
1031                # HACK(nnorwitz):  assume that all upper-case names
1032                # are some macro we aren't expanding.
1033                modifiers |= FUNCTION_UNKNOWN_ANNOTATION
1034            else:
1035                self.HandleError('unexpected token', modifier_token)
1036
1037        assert token.token_type == tokenize.SYNTAX, token
1038        # Handle ctor initializers.
1039        if token.name == ':':
1040            # TODO(nnorwitz): anything else to handle for initializer list?
1041            while token.name != ';' and token.name != '{':
1042                token = self._GetNextToken()
1043
1044        # Handle pointer to functions that are really data but look
1045        # like method declarations.
1046        if token.name == '(':
1047            if parameters[0].name == '*':
1048                # name contains the return type.
1049                name = parameters.pop()
1050                # parameters contains the name of the data.
1051                modifiers = [p.name for p in parameters]
1052                # Already at the ( to open the parameter list.
1053                function_parameters = list(self._GetMatchingChar('(', ')'))
1054                del function_parameters[-1]  # Remove trailing ')'.
1055                # TODO(nnorwitz): store the function_parameters.
1056                token = self._GetNextToken()
1057                assert token.token_type == tokenize.SYNTAX, token
1058                assert token.name == ';', token
1059                return self._CreateVariable(indices, name.name, indices.name,
1060                                            modifiers, '', None)
1061            # At this point, we got something like:
1062            #  return_type (type::*name_)(params);
1063            # This is a data member called name_ that is a function pointer.
1064            # With this code: void (sq_type::*field_)(string&);
1065            # We get: name=void return_type=[] parameters=sq_type ... field_
1066            # TODO(nnorwitz): is return_type always empty?
1067            # TODO(nnorwitz): this isn't even close to being correct.
1068            # Just put in something so we don't crash and can move on.
1069            real_name = parameters[-1]
1070            modifiers = [p.name for p in self._GetParameters()]
1071            del modifiers[-1]           # Remove trailing ')'.
1072            return self._CreateVariable(indices, real_name.name, indices.name,
1073                                        modifiers, '', None)
1074
1075        if token.name == '{':
1076            body = list(self.GetScope())
1077            del body[-1]                # Remove trailing '}'.
1078        else:
1079            body = None
1080            if token.name == '=':
1081                token = self._GetNextToken()
1082                assert token.token_type == tokenize.CONSTANT, token
1083                assert token.name == '0', token
1084                modifiers |= FUNCTION_PURE_VIRTUAL
1085                token = self._GetNextToken()
1086
1087            if token.name == '[':
1088                # TODO(nnorwitz): store tokens and improve parsing.
1089                # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1090                tokens = list(self._GetMatchingChar('[', ']'))
1091                token = self._GetNextToken()
1092
1093            assert token.name == ';', (token, return_type_and_name, parameters)
1094
1095        # Looks like we got a method, not a function.
1096        if len(return_type) > 2 and return_type[-1].name == '::':
1097            return_type, in_class = \
1098                         self._GetReturnTypeAndClassName(return_type)
1099            return Method(indices.start, indices.end, name.name, in_class,
1100                          return_type, parameters, modifiers, templated_types,
1101                          body, self.namespace_stack)
1102        return Function(indices.start, indices.end, name.name, return_type,
1103                        parameters, modifiers, templated_types, body,
1104                        self.namespace_stack)
1105
1106    def _GetReturnTypeAndClassName(self, token_seq):
1107        # Splitting the return type from the class name in a method
1108        # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
1109        # Where is the return type and where is the class name?
1110        # The heuristic used is to pull the last name as the class name.
1111        # This includes all the templated type info.
1112        # TODO(nnorwitz): if there is only One name like in the
1113        # example above, punt and assume the last bit is the class name.
1114
1115        # Ignore a :: prefix, if exists so we can find the first real name.
1116        i = 0
1117        if token_seq[0].name == '::':
1118            i = 1
1119        # Ignore a :: suffix, if exists.
1120        end = len(token_seq) - 1
1121        if token_seq[end-1].name == '::':
1122            end -= 1
1123
1124        # Make a copy of the sequence so we can append a sentinel
1125        # value. This is required for GetName will has to have some
1126        # terminating condition beyond the last name.
1127        seq_copy = token_seq[i:end]
1128        seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
1129        names = []
1130        while i < end:
1131            # Iterate through the sequence parsing out each name.
1132            new_name, next = self.GetName(seq_copy[i:])
1133            assert new_name, 'Got empty new_name, next=%s' % next
1134            # We got a pointer or ref.  Add it to the name.
1135            if next and next.token_type == tokenize.SYNTAX:
1136                new_name.append(next)
1137            names.append(new_name)
1138            i += len(new_name)
1139
1140        # Now that we have the names, it's time to undo what we did.
1141
1142        # Remove the sentinel value.
1143        names[-1].pop()
1144        # Flatten the token sequence for the return type.
1145        return_type = [e for seq in names[:-1] for e in seq]
1146        # The class name is the last name.
1147        class_name = names[-1]
1148        return return_type, class_name
1149
1150    def handle_bool(self):
1151        pass
1152
1153    def handle_char(self):
1154        pass
1155
1156    def handle_int(self):
1157        pass
1158
1159    def handle_long(self):
1160        pass
1161
1162    def handle_short(self):
1163        pass
1164
1165    def handle_double(self):
1166        pass
1167
1168    def handle_float(self):
1169        pass
1170
1171    def handle_void(self):
1172        pass
1173
1174    def handle_wchar_t(self):
1175        pass
1176
1177    def handle_unsigned(self):
1178        pass
1179
1180    def handle_signed(self):
1181        pass
1182
1183    def _GetNestedType(self, ctor):
1184        name = None
1185        name_tokens, token = self.GetName()
1186        if name_tokens:
1187            name = ''.join([t.name for t in name_tokens])
1188
1189        # Handle forward declarations.
1190        if token.token_type == tokenize.SYNTAX and token.name == ';':
1191            return ctor(token.start, token.end, name, None,
1192                        self.namespace_stack)
1193
1194        if token.token_type == tokenize.NAME and self._handling_typedef:
1195            self._AddBackToken(token)
1196            return ctor(token.start, token.end, name, None,
1197                        self.namespace_stack)
1198
1199        # Must be the type declaration.
1200        fields = list(self._GetMatchingChar('{', '}'))
1201        del fields[-1]                  # Remove trailing '}'.
1202        if token.token_type == tokenize.SYNTAX and token.name == '{':
1203            next = self._GetNextToken()
1204            new_type = ctor(token.start, token.end, name, fields,
1205                            self.namespace_stack)
1206            # A name means this is an anonymous type and the name
1207            # is the variable declaration.
1208            if next.token_type != tokenize.NAME:
1209                return new_type
1210            name = new_type
1211            token = next
1212
1213        # Must be variable declaration using the type prefixed with keyword.
1214        assert token.token_type == tokenize.NAME, token
1215        return self._CreateVariable(token, token.name, name, [], '', None)
1216
1217    def handle_struct(self):
1218        # Special case the handling typedef/aliasing of structs here.
1219        # It would be a pain to handle in the class code.
1220        name_tokens, var_token = self.GetName()
1221        if name_tokens:
1222            next_token = self._GetNextToken()
1223            is_syntax = (var_token.token_type == tokenize.SYNTAX and
1224                         var_token.name[0] in '*&')
1225            is_variable = (var_token.token_type == tokenize.NAME and
1226                           next_token.name == ';')
1227            variable = var_token
1228            if is_syntax and not is_variable:
1229                variable = next_token
1230                temp = self._GetNextToken()
1231                if temp.token_type == tokenize.SYNTAX and temp.name == '(':
1232                    # Handle methods declared to return a struct.
1233                    t0 = name_tokens[0]
1234                    struct = tokenize.Token(tokenize.NAME, 'struct',
1235                                            t0.start-7, t0.start-2)
1236                    type_and_name = [struct]
1237                    type_and_name.extend(name_tokens)
1238                    type_and_name.extend((var_token, next_token))
1239                    return self._GetMethod(type_and_name, 0, None, False)
1240                assert temp.name == ';', (temp, name_tokens, var_token)
1241            if is_syntax or (is_variable and not self._handling_typedef):
1242                modifiers = ['struct']
1243                type_name = ''.join([t.name for t in name_tokens])
1244                position = name_tokens[0]
1245                return self._CreateVariable(position, variable.name, type_name,
1246                                            modifiers, var_token.name, None)
1247            name_tokens.extend((var_token, next_token))
1248            self._AddBackTokens(name_tokens)
1249        else:
1250            self._AddBackToken(var_token)
1251        return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
1252
1253    def handle_union(self):
1254        return self._GetNestedType(Union)
1255
1256    def handle_enum(self):
1257        return self._GetNestedType(Enum)
1258
1259    def handle_auto(self):
1260        # TODO(nnorwitz): warn about using auto?  Probably not since it
1261        # will be reclaimed and useful for C++0x.
1262        pass
1263
1264    def handle_register(self):
1265        pass
1266
1267    def handle_const(self):
1268        pass
1269
1270    def handle_inline(self):
1271        pass
1272
1273    def handle_extern(self):
1274        pass
1275
1276    def handle_static(self):
1277        pass
1278
1279    def handle_virtual(self):
1280        # What follows must be a method.
1281        token = token2 = self._GetNextToken()
1282        if token.name == 'inline':
1283            # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1284            token2 = self._GetNextToken()
1285        if token2.token_type == tokenize.SYNTAX and token2.name == '~':
1286            return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
1287        assert token.token_type == tokenize.NAME or token.name == '::', token
1288        return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
1289        return_type_and_name.insert(0, token)
1290        if token2 is not token:
1291            return_type_and_name.insert(1, token2)
1292        return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
1293                               None, False)
1294
1295    def handle_volatile(self):
1296        pass
1297
1298    def handle_mutable(self):
1299        pass
1300
1301    def handle_public(self):
1302        assert self.in_class
1303        self.visibility = VISIBILITY_PUBLIC
1304
1305    def handle_protected(self):
1306        assert self.in_class
1307        self.visibility = VISIBILITY_PROTECTED
1308
1309    def handle_private(self):
1310        assert self.in_class
1311        self.visibility = VISIBILITY_PRIVATE
1312
1313    def handle_friend(self):
1314        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1315        assert tokens
1316        t0 = tokens[0]
1317        return Friend(t0.start, t0.end, tokens, self.namespace_stack)
1318
1319    def handle_static_cast(self):
1320        pass
1321
1322    def handle_const_cast(self):
1323        pass
1324
1325    def handle_dynamic_cast(self):
1326        pass
1327
1328    def handle_reinterpret_cast(self):
1329        pass
1330
1331    def handle_new(self):
1332        pass
1333
1334    def handle_delete(self):
1335        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1336        assert tokens
1337        return Delete(tokens[0].start, tokens[0].end, tokens)
1338
1339    def handle_typedef(self):
1340        token = self._GetNextToken()
1341        if (token.token_type == tokenize.NAME and
1342            keywords.IsKeyword(token.name)):
1343            # Token must be struct/enum/union/class.
1344            method = getattr(self, 'handle_' + token.name)
1345            self._handling_typedef = True
1346            tokens = [method()]
1347            self._handling_typedef = False
1348        else:
1349            tokens = [token]
1350
1351        # Get the remainder of the typedef up to the semi-colon.
1352        tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
1353
1354        # TODO(nnorwitz): clean all this up.
1355        assert tokens
1356        name = tokens.pop()
1357        indices = name
1358        if tokens:
1359            indices = tokens[0]
1360        if not indices:
1361            indices = token
1362        if name.name == ')':
1363            # HACK(nnorwitz): Handle pointers to functions "properly".
1364            if (len(tokens) >= 4 and
1365                tokens[1].name == '(' and tokens[2].name == '*'):
1366                tokens.append(name)
1367                name = tokens[3]
1368        elif name.name == ']':
1369            # HACK(nnorwitz): Handle arrays properly.
1370            if len(tokens) >= 2:
1371                tokens.append(name)
1372                name = tokens[1]
1373        new_type = tokens
1374        if tokens and isinstance(tokens[0], tokenize.Token):
1375            new_type = self.converter.ToType(tokens)[0]
1376        return Typedef(indices.start, indices.end, name.name,
1377                       new_type, self.namespace_stack)
1378
1379    def handle_typeid(self):
1380        pass  # Not needed yet.
1381
1382    def handle_typename(self):
1383        pass  # Not needed yet.
1384
1385    def _GetTemplatedTypes(self):
1386        result = {}
1387        tokens = list(self._GetMatchingChar('<', '>'))
1388        len_tokens = len(tokens) - 1    # Ignore trailing '>'.
1389        i = 0
1390        while i < len_tokens:
1391            key = tokens[i].name
1392            i += 1
1393            if keywords.IsKeyword(key) or key == ',':
1394                continue
1395            type_name = default = None
1396            if i < len_tokens:
1397                i += 1
1398                if tokens[i-1].name == '=':
1399                    assert i < len_tokens, '%s %s' % (i, tokens)
1400                    default, unused_next_token = self.GetName(tokens[i:])
1401                    i += len(default)
1402                else:
1403                    if tokens[i-1].name != ',':
1404                        # We got something like: Type variable.
1405                        # Re-adjust the key (variable) and type_name (Type).
1406                        key = tokens[i-1].name
1407                        type_name = tokens[i-2]
1408
1409            result[key] = (type_name, default)
1410        return result
1411
1412    def handle_template(self):
1413        token = self._GetNextToken()
1414        assert token.token_type == tokenize.SYNTAX, token
1415        assert token.name == '<', token
1416        templated_types = self._GetTemplatedTypes()
1417        # TODO(nnorwitz): for now, just ignore the template params.
1418        token = self._GetNextToken()
1419        if token.token_type == tokenize.NAME:
1420            if token.name == 'class':
1421                return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
1422            elif token.name == 'struct':
1423                return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
1424            elif token.name == 'friend':
1425                return self.handle_friend()
1426        self._AddBackToken(token)
1427        tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
1428        tokens.append(last)
1429        self._AddBackTokens(tokens)
1430        if last.name == '(':
1431            return self.GetMethod(FUNCTION_NONE, templated_types)
1432        # Must be a variable definition.
1433        return None
1434
1435    def handle_true(self):
1436        pass  # Nothing to do.
1437
1438    def handle_false(self):
1439        pass  # Nothing to do.
1440
1441    def handle_asm(self):
1442        pass  # Not needed yet.
1443
1444    def handle_class(self):
1445        return self._GetClass(Class, VISIBILITY_PRIVATE, None)
1446
1447    def _GetBases(self):
1448        # Get base classes.
1449        bases = []
1450        while 1:
1451            token = self._GetNextToken()
1452            assert token.token_type == tokenize.NAME, token
1453            # TODO(nnorwitz): store kind of inheritance...maybe.
1454            if token.name not in ('public', 'protected', 'private'):
1455                # If inheritance type is not specified, it is private.
1456                # Just put the token back so we can form a name.
1457                # TODO(nnorwitz): it would be good to warn about this.
1458                self._AddBackToken(token)
1459            else:
1460                # Check for virtual inheritance.
1461                token = self._GetNextToken()
1462                if token.name != 'virtual':
1463                    self._AddBackToken(token)
1464                else:
1465                    # TODO(nnorwitz): store that we got virtual for this base.
1466                    pass
1467            base, next_token = self.GetName()
1468            bases_ast = self.converter.ToType(base)
1469            assert len(bases_ast) == 1, bases_ast
1470            bases.append(bases_ast[0])
1471            assert next_token.token_type == tokenize.SYNTAX, next_token
1472            if next_token.name == '{':
1473                token = next_token
1474                break
1475            # Support multiple inheritance.
1476            assert next_token.name == ',', next_token
1477        return bases, token
1478
1479    def _GetClass(self, class_type, visibility, templated_types):
1480        class_name = None
1481        class_token = self._GetNextToken()
1482        if class_token.token_type != tokenize.NAME:
1483            assert class_token.token_type == tokenize.SYNTAX, class_token
1484            token = class_token
1485        else:
1486            # Skip any macro (e.g. storage class specifiers) after the
1487            # 'class' keyword.
1488            next_token = self._GetNextToken()
1489            if next_token.token_type == tokenize.NAME:
1490                self._AddBackToken(next_token)
1491            else:
1492                self._AddBackTokens([class_token, next_token])
1493            name_tokens, token = self.GetName()
1494            class_name = ''.join([t.name for t in name_tokens])
1495        bases = None
1496        if token.token_type == tokenize.SYNTAX:
1497            if token.name == ';':
1498                # Forward declaration.
1499                return class_type(class_token.start, class_token.end,
1500                                  class_name, None, templated_types, None,
1501                                  self.namespace_stack)
1502            if token.name in '*&':
1503                # Inline forward declaration.  Could be method or data.
1504                name_token = self._GetNextToken()
1505                next_token = self._GetNextToken()
1506                if next_token.name == ';':
1507                    # Handle data
1508                    modifiers = ['class']
1509                    return self._CreateVariable(class_token, name_token.name,
1510                                                class_name,
1511                                                modifiers, token.name, None)
1512                else:
1513                    # Assume this is a method.
1514                    tokens = (class_token, token, name_token, next_token)
1515                    self._AddBackTokens(tokens)
1516                    return self.GetMethod(FUNCTION_NONE, None)
1517            if token.name == ':':
1518                bases, token = self._GetBases()
1519
1520        body = None
1521        if token.token_type == tokenize.SYNTAX and token.name == '{':
1522            assert token.token_type == tokenize.SYNTAX, token
1523            assert token.name == '{', token
1524
1525            ast = AstBuilder(self.GetScope(), self.filename, class_name,
1526                             visibility, self.namespace_stack)
1527            body = list(ast.Generate())
1528
1529            if not self._handling_typedef:
1530                token = self._GetNextToken()
1531                if token.token_type != tokenize.NAME:
1532                    assert token.token_type == tokenize.SYNTAX, token
1533                    assert token.name == ';', token
1534                else:
1535                    new_class = class_type(class_token.start, class_token.end,
1536                                           class_name, bases, None,
1537                                           body, self.namespace_stack)
1538
1539                    modifiers = []
1540                    return self._CreateVariable(class_token,
1541                                                token.name, new_class,
1542                                                modifiers, token.name, None)
1543        else:
1544            if not self._handling_typedef:
1545                self.HandleError('non-typedef token', token)
1546            self._AddBackToken(token)
1547
1548        return class_type(class_token.start, class_token.end, class_name,
1549                          bases, None, body, self.namespace_stack)
1550
1551    def handle_namespace(self):
1552        token = self._GetNextToken()
1553        # Support anonymous namespaces.
1554        name = None
1555        if token.token_type == tokenize.NAME:
1556            name = token.name
1557            token = self._GetNextToken()
1558        self.namespace_stack.append(name)
1559        assert token.token_type == tokenize.SYNTAX, token
1560        # Create an internal token that denotes when the namespace is complete.
1561        internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
1562                                        None, None)
1563        internal_token.whence = token.whence
1564        if token.name == '=':
1565            # TODO(nnorwitz): handle aliasing namespaces.
1566            name, next_token = self.GetName()
1567            assert next_token.name == ';', next_token
1568            self._AddBackToken(internal_token)
1569        else:
1570            assert token.name == '{', token
1571            tokens = list(self.GetScope())
1572            # Replace the trailing } with the internal namespace pop token.
1573            tokens[-1] = internal_token
1574            # Handle namespace with nothing in it.
1575            self._AddBackTokens(tokens)
1576        return None
1577
1578    def handle_using(self):
1579        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1580        assert tokens
1581        return Using(tokens[0].start, tokens[0].end, tokens)
1582
1583    def handle_explicit(self):
1584        assert self.in_class
1585        # Nothing much to do.
1586        # TODO(nnorwitz): maybe verify the method name == class name.
1587        # This must be a ctor.
1588        return self.GetMethod(FUNCTION_CTOR, None)
1589
1590    def handle_this(self):
1591        pass  # Nothing to do.
1592
1593    def handle_operator(self):
1594        # Pull off the next token(s?) and make that part of the method name.
1595        pass
1596
1597    def handle_sizeof(self):
1598        pass
1599
1600    def handle_case(self):
1601        pass
1602
1603    def handle_switch(self):
1604        pass
1605
1606    def handle_default(self):
1607        token = self._GetNextToken()
1608        assert token.token_type == tokenize.SYNTAX
1609        assert token.name == ':'
1610
1611    def handle_if(self):
1612        pass
1613
1614    def handle_else(self):
1615        pass
1616
1617    def handle_return(self):
1618        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1619        if not tokens:
1620            return Return(self.current_token.start, self.current_token.end, None)
1621        return Return(tokens[0].start, tokens[0].end, tokens)
1622
1623    def handle_goto(self):
1624        tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
1625        assert len(tokens) == 1, str(tokens)
1626        return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
1627
1628    def handle_try(self):
1629        pass  # Not needed yet.
1630
1631    def handle_catch(self):
1632        pass  # Not needed yet.
1633
1634    def handle_throw(self):
1635        pass  # Not needed yet.
1636
1637    def handle_while(self):
1638        pass
1639
1640    def handle_do(self):
1641        pass
1642
1643    def handle_for(self):
1644        pass
1645
1646    def handle_break(self):
1647        self._IgnoreUpTo(tokenize.SYNTAX, ';')
1648
1649    def handle_continue(self):
1650        self._IgnoreUpTo(tokenize.SYNTAX, ';')
1651
1652
1653def BuilderFromSource(source, filename):
1654    """Utility method that returns an AstBuilder from source code.
1655
1656    Args:
1657      source: 'C++ source code'
1658      filename: 'file1'
1659
1660    Returns:
1661      AstBuilder
1662    """
1663    return AstBuilder(tokenize.GetTokens(source), filename)
1664
1665
1666def PrintIndentifiers(filename, should_print):
1667    """Prints all identifiers for a C++ source file.
1668
1669    Args:
1670      filename: 'file1'
1671      should_print: predicate with signature: bool Function(token)
1672    """
1673    source = utils.ReadFile(filename, False)
1674    if source is None:
1675        sys.stderr.write('Unable to find: %s\n' % filename)
1676        return
1677
1678    #print('Processing %s' % actual_filename)
1679    builder = BuilderFromSource(source, filename)
1680    try:
1681        for node in builder.Generate():
1682            if should_print(node):
1683                print(node.name)
1684    except KeyboardInterrupt:
1685        return
1686    except:
1687        pass
1688
1689
1690def PrintAllIndentifiers(filenames, should_print):
1691    """Prints all identifiers for each C++ source file in filenames.
1692
1693    Args:
1694      filenames: ['file1', 'file2', ...]
1695      should_print: predicate with signature: bool Function(token)
1696    """
1697    for path in filenames:
1698        PrintIndentifiers(path, should_print)
1699
1700
1701def main(argv):
1702    for filename in argv[1:]:
1703        source = utils.ReadFile(filename)
1704        if source is None:
1705            continue
1706
1707        print('Processing %s' % filename)
1708        builder = BuilderFromSource(source, filename)
1709        try:
1710            entire_ast = filter(None, builder.Generate())
1711        except KeyboardInterrupt:
1712            return
1713        except:
1714            # Already printed a warning, print the traceback and continue.
1715            traceback.print_exc()
1716        else:
1717            if utils.DEBUG:
1718                for ast in entire_ast:
1719                    print(ast)
1720
1721
1722if __name__ == '__main__':
1723    main(sys.argv)
1724