12da489cd246702bee5938545b18a6f710ed214bcJamie Gennis#!/usr/bin/env python 22da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# 32da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# Copyright 2008 The Closure Linter Authors. All Rights Reserved. 42da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# 52da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# Licensed under the Apache License, Version 2.0 (the "License"); 62da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# you may not use this file except in compliance with the License. 72da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# You may obtain a copy of the License at 82da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# 92da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# http://www.apache.org/licenses/LICENSE-2.0 102da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# 112da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# Unless required by applicable law or agreed to in writing, software 122da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# distributed under the License is distributed on an "AS-IS" BASIS, 132da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 142da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# See the License for the specific language governing permissions and 152da489cd246702bee5938545b18a6f710ed214bcJamie Gennis# limitations under the License. 162da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 172da489cd246702bee5938545b18a6f710ed214bcJamie Gennis"""Classes to represent tokens and positions within them.""" 182da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 192da489cd246702bee5938545b18a6f710ed214bcJamie Gennis__author__ = ('robbyw@google.com (Robert Walker)', 202da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 'ajp@google.com (Andy Perelson)') 212da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 222da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 232da489cd246702bee5938545b18a6f710ed214bcJamie Gennisclass TokenType(object): 242da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Token types common to all languages.""" 252da489cd246702bee5938545b18a6f710ed214bcJamie Gennis NORMAL = 'normal' 262da489cd246702bee5938545b18a6f710ed214bcJamie Gennis WHITESPACE = 'whitespace' 272da489cd246702bee5938545b18a6f710ed214bcJamie Gennis BLANK_LINE = 'blank line' 282da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 292da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 302da489cd246702bee5938545b18a6f710ed214bcJamie Gennisclass Token(object): 312da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Token class for intelligent text splitting. 322da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 332da489cd246702bee5938545b18a6f710ed214bcJamie Gennis The token class represents a string of characters and an identifying type. 342da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 352da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Attributes: 362da489cd246702bee5938545b18a6f710ed214bcJamie Gennis type: The type of token. 372da489cd246702bee5938545b18a6f710ed214bcJamie Gennis string: The characters the token comprises. 382da489cd246702bee5938545b18a6f710ed214bcJamie Gennis length: The length of the token. 392da489cd246702bee5938545b18a6f710ed214bcJamie Gennis line: The text of the line the token is found in. 402da489cd246702bee5938545b18a6f710ed214bcJamie Gennis line_number: The number of the line the token is found in. 412da489cd246702bee5938545b18a6f710ed214bcJamie Gennis values: Dictionary of values returned from the tokens regex match. 422da489cd246702bee5938545b18a6f710ed214bcJamie Gennis previous: The token before this one. 432da489cd246702bee5938545b18a6f710ed214bcJamie Gennis next: The token after this one. 442da489cd246702bee5938545b18a6f710ed214bcJamie Gennis start_index: The character index in the line where this token starts. 452da489cd246702bee5938545b18a6f710ed214bcJamie Gennis attached_object: Object containing more information about this token. 462da489cd246702bee5938545b18a6f710ed214bcJamie Gennis metadata: Object containing metadata about this token. Must be added by 472da489cd246702bee5938545b18a6f710ed214bcJamie Gennis a separate metadata pass. 482da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 492da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 502da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def __init__(self, string, token_type, line, line_number, values=None): 512da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Creates a new Token object. 522da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 532da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Args: 542da489cd246702bee5938545b18a6f710ed214bcJamie Gennis string: The string of input the token contains. 552da489cd246702bee5938545b18a6f710ed214bcJamie Gennis token_type: The type of token. 562da489cd246702bee5938545b18a6f710ed214bcJamie Gennis line: The text of the line this token is in. 572da489cd246702bee5938545b18a6f710ed214bcJamie Gennis line_number: The line number of the token. 582da489cd246702bee5938545b18a6f710ed214bcJamie Gennis values: A dict of named values within the token. For instance, a 592da489cd246702bee5938545b18a6f710ed214bcJamie Gennis function declaration may have a value called 'name' which captures the 602da489cd246702bee5938545b18a6f710ed214bcJamie Gennis name of the function. 612da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 622da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.type = token_type 632da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.string = string 642da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.length = len(string) 652da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.line = line 662da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.line_number = line_number 672da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.values = values 682da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 692da489cd246702bee5938545b18a6f710ed214bcJamie Gennis # These parts can only be computed when the file is fully tokenized 702da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.previous = None 712da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.next = None 722da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.start_index = None 732da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 742da489cd246702bee5938545b18a6f710ed214bcJamie Gennis # This part is set in statetracker.py 752da489cd246702bee5938545b18a6f710ed214bcJamie Gennis # TODO(robbyw): Wrap this in to metadata 762da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.attached_object = None 772da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 782da489cd246702bee5938545b18a6f710ed214bcJamie Gennis # This part is set in *metadatapass.py 792da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.metadata = None 802da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 812da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def IsFirstInLine(self): 822da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Tests if this token is the first token in its line. 832da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 842da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Returns: 852da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Whether the token is the first token in its line. 862da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 872da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return not self.previous or self.previous.line_number != self.line_number 882da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 892da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def IsLastInLine(self): 902da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Tests if this token is the last token in its line. 912da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 922da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Returns: 932da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Whether the token is the last token in its line. 942da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 952da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return not self.next or self.next.line_number != self.line_number 962da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 972da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def IsType(self, token_type): 982da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Tests if this token is of the given type. 992da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1002da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Args: 1012da489cd246702bee5938545b18a6f710ed214bcJamie Gennis token_type: The type to test for. 1022da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1032da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Returns: 1042da489cd246702bee5938545b18a6f710ed214bcJamie Gennis True if the type of this token matches the type passed in. 1052da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 1062da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return self.type == token_type 1072da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1082da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def IsAnyType(self, *token_types): 1092da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Tests if this token is any of the given types. 1102da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1112da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Args: 1122da489cd246702bee5938545b18a6f710ed214bcJamie Gennis token_types: The types to check. Also accepts a single array. 1132da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1142da489cd246702bee5938545b18a6f710ed214bcJamie Gennis Returns: 1152da489cd246702bee5938545b18a6f710ed214bcJamie Gennis True if the type of this token is any of the types passed in. 1162da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """ 1172da489cd246702bee5938545b18a6f710ed214bcJamie Gennis if not isinstance(token_types[0], basestring): 1182da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return self.type in token_types[0] 1192da489cd246702bee5938545b18a6f710ed214bcJamie Gennis else: 1202da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return self.type in token_types 1212da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1222da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def __repr__(self): 1232da489cd246702bee5938545b18a6f710ed214bcJamie Gennis return '<Token: %s, "%s", %r, %d, %r>' % (self.type, self.string, 1242da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.values, self.line_number, 1252da489cd246702bee5938545b18a6f710ed214bcJamie Gennis self.metadata) 1262da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1272da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def __iter__(self): 1282da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Returns a token iterator.""" 1292da489cd246702bee5938545b18a6f710ed214bcJamie Gennis node = self 1302da489cd246702bee5938545b18a6f710ed214bcJamie Gennis while node: 1312da489cd246702bee5938545b18a6f710ed214bcJamie Gennis yield node 1322da489cd246702bee5938545b18a6f710ed214bcJamie Gennis node = node.next 1332da489cd246702bee5938545b18a6f710ed214bcJamie Gennis 1342da489cd246702bee5938545b18a6f710ed214bcJamie Gennis def __reversed__(self): 1352da489cd246702bee5938545b18a6f710ed214bcJamie Gennis """Returns a reverse-direction token iterator.""" 1362da489cd246702bee5938545b18a6f710ed214bcJamie Gennis node = self 1372da489cd246702bee5938545b18a6f710ed214bcJamie Gennis while node: 1382da489cd246702bee5938545b18a6f710ed214bcJamie Gennis yield node 1392da489cd246702bee5938545b18a6f710ed214bcJamie Gennis node = node.previous 140