15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#!/usr/bin/env python
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright 2008 The Closure Linter Authors. All Rights Reserved.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Licensed under the Apache License, Version 2.0 (the "License");
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# you may not use this file except in compliance with the License.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# You may obtain a copy of the License at
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#      http://www.apache.org/licenses/LICENSE-2.0
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Unless required by applicable law or agreed to in writing, software
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# distributed under the License is distributed on an "AS-IS" BASIS,
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# See the License for the specific language governing permissions and
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# limitations under the License.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Classes to represent tokens and positions within them."""
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)__author__ = ('robbyw@google.com (Robert Walker)',
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              'ajp@google.com (Andy Perelson)')
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class TokenType(object):
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """Token types common to all languages."""
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NORMAL = 'normal'
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  WHITESPACE = 'whitespace'
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BLANK_LINE = 'blank line'
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Token(object):
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """Token class for intelligent text splitting.
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  The token class represents a string of characters and an identifying type.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Attributes:
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type: The type of token.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string: The characters the token comprises.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    length: The length of the token.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    line: The text of the line the token is found in.
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    line_number: The number of the line the token is found in.
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    values: Dictionary of values returned from the tokens regex match.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    previous: The token before this one.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    next: The token after this one.
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    start_index: The character index in the line where this token starts.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    attached_object: Object containing more information about this token.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    metadata: Object containing metadata about this token.  Must be added by
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        a separate metadata pass.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  """
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  def __init__(self, string, token_type, line, line_number, values=None,
515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)               orig_line_number=None):
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Creates a new Token object.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Args:
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string: The string of input the token contains.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      token_type: The type of token.
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      line: The text of the line this token is in.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      line_number: The line number of the token.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      values: A dict of named values within the token.  For instance, a
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        function declaration may have a value called 'name' which captures the
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        name of the function.
625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      orig_line_number: The line number of the original file this token comes
635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        from. This should be only set during the tokenization process. For newly
645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        created error fix tokens after that, it should be None.
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.type = token_type
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.string = string
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.length = len(string)
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.line = line
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.line_number = line_number
715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    self.orig_line_number = orig_line_number
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.values = values
735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    self.is_deleted = False
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # These parts can only be computed when the file is fully tokenized
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.previous = None
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.next = None
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.start_index = None
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # This part is set in statetracker.py
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # TODO(robbyw): Wrap this in to metadata
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.attached_object = None
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # This part is set in *metadatapass.py
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.metadata = None
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def IsFirstInLine(self):
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Tests if this token is the first token in its line.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Returns:
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Whether the token is the first token in its line.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return not self.previous or self.previous.line_number != self.line_number
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def IsLastInLine(self):
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Tests if this token is the last token in its line.
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Returns:
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Whether the token is the last token in its line.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return not self.next or self.next.line_number != self.line_number
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def IsType(self, token_type):
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Tests if this token is of the given type.
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Args:
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      token_type: The type to test for.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Returns:
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      True if the type of this token matches the type passed in.
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return self.type == token_type
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def IsAnyType(self, *token_types):
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Tests if this token is any of the given types.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Args:
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      token_types: The types to check.  Also accepts a single array.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Returns:
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      True if the type of this token is any of the types passed in.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if not isinstance(token_types[0], basestring):
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return self.type in token_types[0]
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else:
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return self.type in token_types
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def __repr__(self):
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return '<Token: %s, "%s", %r, %d, %r>' % (self.type, self.string,
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              self.values, self.line_number,
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              self.metadata)
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def __iter__(self):
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Returns a token iterator."""
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    node = self
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while node:
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      yield node
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      node = node.next
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def __reversed__(self):
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    """Returns a reverse-direction token iterator."""
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    node = self
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while node:
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      yield node
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      node = node.previous
146