1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov"""ANTLR3 runtime package""" 2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# begin[licence] 4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# [The "BSD licence"] 6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Copyright (c) 2005-2008 Terence Parr 7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# All rights reserved. 8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Redistribution and use in source and binary forms, with or without 10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# modification, are permitted provided that the following conditions 11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# are met: 12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 1. Redistributions of source code must retain the above copyright 13ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# notice, this list of conditions and the following disclaimer. 14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 2. Redistributions in binary form must reproduce the above copyright 15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# notice, this list of conditions and the following disclaimer in the 16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# documentation and/or other materials provided with the distribution. 17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 3. The name of the author may not be used to endorse or promote products 18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# derived from this software without specific prior written permission. 19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 20ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# end[licence] 32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovimport codecs 34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom StringIO import StringIO 35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom antlr3.constants import DEFAULT_CHANNEL, EOF 37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom antlr3.tokens import Token, CommonToken 38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# basic interfaces 43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# IntStream 44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# +- CharStream 45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# \- TokenStream 46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# subclasses must implemented all methods 48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass IntStream(object): 52ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief Base interface for streams of integer values. 54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov A simple stream of integers used when all I care about is the char 56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov or token type sequence (such as interpretation). 57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def consume(self): 60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def LA(self, i): 64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """Get int at current input pointer + i ahead where i=1 is next int. 65ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Negative indexes are allowed. LA(-1) is previous token (token 67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov just matched). LA(-i) where i is before first token should 68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov yield -1, invalid char / EOF. 69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def mark(self): 75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 76ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Tell the stream to start buffering if it hasn't already. Return 77ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov current input position, index(), or some other marker so that 78ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov when passed to rewind() you get back to the same spot. 79ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov rewind(mark()) should not affect the input cursor. The Lexer 80ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov track line/col info as well as input index so its markers are 81ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov not pure input indexes. Same for tree node streams. 82ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 83ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 84ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 85ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 86ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 87ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def index(self): 88ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 89ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Return the current input symbol index 0..n where n indicates the 90ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov last symbol has been read. The index is the symbol about to be 91ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov read not the most recently read symbol. 92ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 93ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 94ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 95ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 96ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 97ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def rewind(self, marker=None): 98ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 99ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Reset the stream so that next call to index would return marker. 100ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov The marker will usually be index() but it doesn't have to be. It's 101ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov just a marker to indicate what state the stream was in. This is 102ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov essentially calling release() and seek(). If there are markers 103ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov created after this marker argument, this routine must unroll them 104ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov like a stack. Assume the state the stream was in when this marker 105ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov was created. 106ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 107ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov If marker is None: 108ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Rewind to the input position of the last marker. 109ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Used currently only after a cyclic DFA and just 110ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov before starting a sem/syn predicate to get the 111ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov input position back to the start of the decision. 112ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Do not "pop" the marker off the state. mark(i) 113ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov and rewind(i) should balance still. It is 114ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov like invoking rewind(last marker) but it should not "pop" 115ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the marker off. It's like seek(last marker's input position). 116ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 117ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 118ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 119ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 120ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 121ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def release(self, marker=None): 122ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 123ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov You may want to commit to a backtrack but don't want to force the 124ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov stream to keep bookkeeping objects around for a marker that is 125ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov no longer necessary. This will have the same behavior as 126ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov rewind() except it releases resources without the backward seek. 127ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This must throw away resources for all markers back to the marker 128ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov argument. So if you're nested 5 levels of mark(), and then release(2) 129ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov you have to release resources for depths 2..5. 130ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 131ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 132ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 133ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 134ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 135ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def seek(self, index): 136ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 137ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Set the input cursor to the position indicated by index. This is 138ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov normally used to seek ahead in the input stream. No buffering is 139ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov required to do this unless you know your stream will use seek to 140ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov move backwards such as when backtracking. 141ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 142ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is different from rewind in its multi-directional 143ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov requirement and in that its argument is strictly an input cursor 144ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov (index). 145ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 146ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov For char streams, seeking forward must update the stream state such 147ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov as line number. For seeking backwards, you will be presumably 148ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov backtracking using the mark/rewind mechanism that restores state and 149ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov so this method does not need to update state when seeking backwards. 150ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 151ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Currently, this method is only used for efficient backtracking using 152ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov memoization, but in the future it may be used for incremental parsing. 153ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 154ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov The index is 0..n-1. A seek to position i means that LA(1) will 155ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return the ith symbol. So, seeking to 0 means LA(1) will return the 156ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov first element in the stream. 157ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 158ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 159ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 160ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 161ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 162ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def size(self): 163ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 164ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Only makes sense for streams that buffer everything up probably, but 165ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov might be useful to display the entire stream or for testing. This 166ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov value includes a single EOF. 167ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 168ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 169ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 170ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 171ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 172ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getSourceName(self): 173ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 174ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Where are you getting symbols from? Normally, implementations will 175ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pass the buck all the way to the lexer who can ask its input stream 176ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov for the file name or whatever. 177ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 178ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 179ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 180ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 181ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 182ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CharStream(IntStream): 183ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 184ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief A source of characters for an ANTLR lexer. 185ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 186ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is an abstract class that must be implemented by a subclass. 187ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 188ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 189ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 190ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # pylint does not realize that this is an interface, too 191ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov #pylint: disable-msg=W0223 192ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 193ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov EOF = -1 194ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 195ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 196ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def substring(self, start, stop): 197ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 198ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov For infinite streams, you don't need this; primarily I'm providing 199ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov a useful interface for action code. Just make sure actions don't 200ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov use this on streams that don't support it. 201ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 202ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 203ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 204ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 205ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 206ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def LT(self, i): 207ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 208ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Get the ith character of lookahead. This is the same usually as 209ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov LA(i). This will be used for labels in the generated 210ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov lexer code. I'd prefer to return a char here type-wise, but it's 211ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov probably better to be 32-bit clean and be consistent with LA. 212ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 213ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 214ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 215ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 216ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 217ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getLine(self): 218ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ANTLR tracks the line information automatically""" 219ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 220ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 221ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 222ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 223ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setLine(self, line): 224ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 225ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Because this stream can rewind, we need to be able to reset the line 226ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 227ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 228ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 229ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 230ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 231ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getCharPositionInLine(self): 232ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 233ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov The index of the character relative to the beginning of the line 0..n-1 234ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 235ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 236ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 237ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 238ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 239ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setCharPositionInLine(self, pos): 240ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 241ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 242ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 243ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass TokenStream(IntStream): 244ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 245ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 246ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief A stream of tokens accessing tokens from a TokenSource 247ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 248ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is an abstract class that must be implemented by a subclass. 249ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 250ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 251ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 252ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # pylint does not realize that this is an interface, too 253ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov #pylint: disable-msg=W0223 254ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 255ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def LT(self, k): 256ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 257ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Get Token at current input pointer + i ahead where i=1 is next Token. 258ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov i<0 indicates tokens in the past. So -1 is previous token and -2 is 259ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. 260ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Return null for LT(0) and any index that results in an absolute address 261ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov that is negative. 262ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 263ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 264ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 265ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 266ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 267ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def range(self): 268ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 269ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov How far ahead has the stream been asked to look? The return 270ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov value is a valid index from 0..n-1. 271ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 272ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 273ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 274ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 275ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 276ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def get(self, i): 277ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 278ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Get a token at an absolute index i; 0..n-1. This is really only 279ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov needed for profiling and debugging and token stream rewriting. 280ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov If you don't want to buffer up tokens, then this method makes no 281ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov sense for you. Naturally you can't use the rewrite stream feature. 282ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov I believe DebugTokenStream can easily be altered to not use 283ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov this method, removing the dependency. 284ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 285ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 286ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 287ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 288ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 289ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getTokenSource(self): 290ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 291ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Where is this stream pulling tokens from? This is not the name, but 292ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the object that provides Token objects. 293ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 294ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 295ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 296ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 297ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 298ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def toString(self, start=None, stop=None): 299ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 300ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Return the text of all tokens from start to stop, inclusive. 301ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov If the stream does not buffer all the tokens then it can just 302ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return "" or null; Users should not access $ruleLabel.text in 303ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov an action of course in that case. 304ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 305ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Because the user is not required to use a token with an index stored 306ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov in it, we must provide a means for two token objects themselves to 307ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov indicate the start/end location. Most often this will just delegate 308ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov to the other toString(int,int). This is also parallel with 309ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the TreeNodeStream.toString(Object,Object). 310ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 311ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 312ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov raise NotImplementedError 313ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 314ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 315ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 316ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 317ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# character streams for use in lexers 318ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# CharStream 319ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# \- ANTLRStringStream 320ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 321ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 322ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 323ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 324ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRStringStream(CharStream): 325ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 326ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief CharStream that pull data from a unicode string. 327ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 328ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov A pretty quick CharStream that pulls all data from an array 329ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov directly. Every method call counts in the lexer. 330ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 331ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 332ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 333ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 334ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def __init__(self, data): 335ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 336ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param data This should be a unicode string holding the data you want 337ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov to parse. If you pass in a byte string, the Lexer will choke on 338ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov non-ascii data. 339ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 340ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 341ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 342ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov CharStream.__init__(self) 343ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 344ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # The data being scanned 345ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.strdata = unicode(data) 346ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.data = [ord(c) for c in self.strdata] 347ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 348ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # How many characters are actually in the buffer 349ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.n = len(data) 350ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 351ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # 0..n-1 index into string of next char 352ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = 0 353ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 354ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # line number 1..n within the input 355ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.line = 1 356ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 357ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # The index of the character relative to the beginning of the 358ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # line 0..n-1 359ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine = 0 360ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 361ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # A list of CharStreamState objects that tracks the stream state 362ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # values line, charPositionInLine, and p that can change as you 363ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # move through the input stream. Indexed from 0..markDepth-1. 364ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self._markers = [ ] 365ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.lastMarker = None 366ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.markDepth = 0 367ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 368ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # What is name or source of this char stream? 369ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.name = None 370ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 371ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 372ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def reset(self): 373ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 374ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Reset the stream so that it's in the same state it was 375ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov when the object was created *except* the data array is not 376ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov touched. 377ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 378ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 379ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = 0 380ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.line = 1 381ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine = 0 382ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self._markers = [ ] 383ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 384ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 385ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def consume(self): 386ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 387ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if self.data[self.p] == 10: # \n 388ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.line += 1 389ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine = 0 390ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov else: 391ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine += 1 392ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 393ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p += 1 394ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 395ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except IndexError: 396ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # happend when we reached EOF and self.data[self.p] fails 397ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # just do nothing 398ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pass 399ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 400ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 401ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 402ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def LA(self, i): 403ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if i == 0: 404ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return 0 # undefined 405ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 406ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if i < 0: 407ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 408ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 409ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 410ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.data[self.p+i-1] 411ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except IndexError: 412ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return EOF 413ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 414ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 415ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 416ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def LT(self, i): 417ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if i == 0: 418ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return 0 # undefined 419ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 420ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if i < 0: 421ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 422ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 423ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 424ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.strdata[self.p+i-1] 425ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except IndexError: 426ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return EOF 427ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 428ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 429ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def index(self): 430ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 431ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Return the current input symbol index 0..n where n indicates the 432ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov last symbol has been read. The index is the index of char to 433ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov be returned from LA(1). 434ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 435ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 436ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.p 437ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 438ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 439ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def size(self): 440ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.n 441ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 442ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 443ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def mark(self): 444ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov state = (self.p, self.line, self.charPositionInLine) 445ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 446ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self._markers[self.markDepth] = state 447ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except IndexError: 448ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self._markers.append(state) 449ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.markDepth += 1 450ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 451ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.lastMarker = self.markDepth 452ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 453ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.lastMarker 454ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 455ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 456ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def rewind(self, marker=None): 457ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if marker is None: 458ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov marker = self.lastMarker 459ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 460ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov p, line, charPositionInLine = self._markers[marker-1] 461ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 462ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.seek(p) 463ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.line = line 464ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine = charPositionInLine 465ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.release(marker) 466ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 467ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 468ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def release(self, marker=None): 469ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if marker is None: 470ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov marker = self.lastMarker 471ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 472ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.markDepth = marker-1 473ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 474ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 475ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def seek(self, index): 476ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 477ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov consume() ahead until p==index; can't just set p=index as we must 478ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov update line and charPositionInLine. 479ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 480ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 481ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if index <= self.p: 482ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = index # just jump; don't update stream state (line, ...) 483ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return 484ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 485ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # seek forward, consume until p hits index 486ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov while self.p < index: 487ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.consume() 488ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 489ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 490ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def substring(self, start, stop): 491ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.strdata[start:stop+1] 492ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 493ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 494ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getLine(self): 495ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """Using setter/getter methods is deprecated. Use o.line instead.""" 496ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.line 497ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 498ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 499ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getCharPositionInLine(self): 500ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 501ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Using setter/getter methods is deprecated. Use o.charPositionInLine 502ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov instead. 503ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 504ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.charPositionInLine 505ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 506ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 507ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setLine(self, line): 508ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """Using setter/getter methods is deprecated. Use o.line instead.""" 509ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.line = line 510ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 511ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 512ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setCharPositionInLine(self, pos): 513ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 514ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Using setter/getter methods is deprecated. Use o.charPositionInLine 515ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov instead. 516ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 517ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.charPositionInLine = pos 518ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 519ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 520ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getSourceName(self): 521ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.name 522ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 523ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 524ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRFileStream(ANTLRStringStream): 525ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 526ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief CharStream that opens a file to read the data. 527ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 528ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is a char buffer stream that is loaded from a file 529ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov all at once when you construct the object. 530ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 531ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 532ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def __init__(self, fileName, encoding=None): 533ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 534ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param fileName The path to the file to be opened. The file will be 535ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov opened with mode 'rb'. 536ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 537ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param encoding If you set the optional encoding argument, then the 538ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov data will be decoded on the fly. 539ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 540ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 541ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 542ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.fileName = fileName 543ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 544ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov fp = codecs.open(fileName, 'rb', encoding) 545ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 546ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov data = fp.read() 547ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov finally: 548ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov fp.close() 549ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 550ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov ANTLRStringStream.__init__(self, data) 551ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 552ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 553ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getSourceName(self): 554ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """Deprecated, access o.fileName directly.""" 555ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 556ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.fileName 557ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 558ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 559ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRInputStream(ANTLRStringStream): 560ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 561ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief CharStream that reads data from a file-like object. 562ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 563ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is a char buffer stream that is loaded from a file like object 564ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov all at once when you construct the object. 565ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 566ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov All input is consumed from the file, but it is not closed. 567ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 568ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 569ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def __init__(self, file, encoding=None): 570ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 571ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param file A file-like object holding your input. Only the read() 572ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov method must be implemented. 573ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 574ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param encoding If you set the optional encoding argument, then the 575ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov data will be decoded on the fly. 576ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 577ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 578ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 579ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if encoding is not None: 580ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # wrap input in a decoding reader 581ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov reader = codecs.lookup(encoding)[2] 582ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov file = reader(file) 583ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 584ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov data = file.read() 585ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 586ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov ANTLRStringStream.__init__(self, data) 587ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 588ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 589ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# I guess the ANTLR prefix exists only to avoid a name clash with some Java 590ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# mumbojumbo. A plain "StringStream" looks better to me, which should be 591ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# the preferred name in Python. 592ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovStringStream = ANTLRStringStream 593ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFileStream = ANTLRFileStream 594ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovInputStream = ANTLRInputStream 595ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 596ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 597ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 598ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 599ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Token streams 600ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# TokenStream 601ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# +- CommonTokenStream 602ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# \- TokenRewriteStream 603ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 604ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################ 605ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 606ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 607ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CommonTokenStream(TokenStream): 608ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 609ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @brief The most common stream of tokens 610ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 611ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov The most common stream of tokens is one where every token is buffered up 612ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov and tokens are prefiltered for a certain channel (the parser will only 613ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov see these tokens and cannot change the filter channel number during the 614ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov parse). 615ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 616ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 617ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 618ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 619ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param tokenSource A TokenSource instance (usually a Lexer) to pull 620ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the tokens from. 621ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 622ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov @param channel Skip tokens on any channel but this one; this is how we 623ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov skip whitespace... 624ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 625ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 626ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 627ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov TokenStream.__init__(self) 628ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 629ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.tokenSource = tokenSource 630ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 631ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # Record every single token pulled from the source so we can reproduce 632ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # chunks of it later. 633ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.tokens = [] 634ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 635ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # Map<tokentype, channel> to override some Tokens' channel numbers 636ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.channelOverrideMap = {} 637ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 638ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # Set<tokentype>; discard any tokens with this type 639ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.discardSet = set() 640ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 641ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # Skip tokens on any channel but this one; this is how we skip 642ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # whitespace... 643ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.channel = channel 644ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 645ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # By default, track all incoming tokens 646ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.discardOffChannelTokens = False 647ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 648ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # The index into the tokens list of the current token (next token 649ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # to consume). p==-1 indicates that the tokens list is empty 650ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = -1 651ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 652ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # Remember last marked position 653ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.lastMarker = None 654ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 655ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # how deep have we gone? 656ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self._range = -1 657ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 658ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 659ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def makeEOFToken(self): 660ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return self.tokenSource.makeEOFToken() 661ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 662ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 663ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setTokenSource(self, tokenSource): 664ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """Reset this token stream by setting its token source.""" 665ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 666ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.tokenSource = tokenSource 667ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.tokens = [] 668ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = -1 669ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.channel = DEFAULT_CHANNEL 670ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 671ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 672ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def reset(self): 673ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = 0 674ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.lastMarker = None 675ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 676ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 677ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def fillBuffer(self): 678ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 679ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Load all tokens from the token source and put in tokens. 680ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov This is done upon first LT request because you might want to 681ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov set some token type / channel overrides before filling buffer. 682ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 683ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 684ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 685ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov index = 0 686ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov t = self.tokenSource.nextToken() 687ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov while t is not None and t.type != EOF: 688ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov discard = False 689ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 690ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if self.discardSet is not None and t.type in self.discardSet: 691ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov discard = True 692ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 693ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov elif self.discardOffChannelTokens and t.channel != self.channel: 694ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov discard = True 695ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 696ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # is there a channel override for token type? 697ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 698ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov overrideChannel = self.channelOverrideMap[t.type] 699ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 700ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except KeyError: 701ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # no override for this type 702ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pass 703ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 704ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov else: 705ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if overrideChannel == self.channel: 706ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov t.channel = overrideChannel 707ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov else: 708ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov discard = True 709ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 710ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if not discard: 711ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov t.index = index 712ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.tokens.append(t) 713ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov index += 1 714ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 715ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov t = self.tokenSource.nextToken() 716ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 717ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # leave p pointing at first token on channel 718ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = 0 719ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = self.skipOffTokenChannels(self.p) 720ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 721ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 722ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def consume(self): 723ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 724ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Move the input pointer to the next incoming token. The stream 725ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov must become active with LT(1) available. consume() simply 726ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov moves the input pointer so that LT(1) points at the next 727ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov input symbol. Consume at least one token. 728ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 729ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Walk past any token not on the channel the parser is listening to. 730ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 731ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 732ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if self.p < len(self.tokens): 733ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p += 1 734ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 735ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.p = self.skipOffTokenChannels(self.p) # leave p on valid token 736ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 737ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 738ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def skipOffTokenChannels(self, i): 739ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 740ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Given a starting index, return the index of the first on-channel 741ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov token. 742ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 743ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 744ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov try: 745ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov while self.tokens[i].channel != self.channel: 746ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov i += 1 747ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov except IndexError: 748ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov # hit the end of token stream 749ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov pass 750ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 751ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return i 752ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 753ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 754ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def skipOffTokenChannelsReverse(self, i): 755ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov while i >= 0 and self.tokens[i].channel != self.channel: 756ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov i -= 1 757ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 758ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov return i 759ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 760ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 761ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def setTokenTypeChannel(self, ttype, channel): 762ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 763ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov A simple filter mechanism whereby you can tell this token stream 764ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov to force all tokens of type ttype to be on channel. For example, 765ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov when interpreting, we cannot exec actions so we need to tell 766ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the stream to force all WS and NEWLINE to be a different, ignored 767ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov channel. 768ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 769ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 770ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.channelOverrideMap[ttype] = channel 771ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 772ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 773ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def discardTokenType(self, ttype): 774ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.discardSet.add(ttype) 775ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 776ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 777ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov def getTokens(self, start=None, stop=None, types=None): 778ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 779ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov Given a start and stop index, return a list of all tokens in 780ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov the token type set. Return None if no tokens were found. This 781ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov method looks at both on and off channel tokens. 782ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov """ 783ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 784ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if self.p == -1: 785ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov self.fillBuffer() 786ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 787ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov if stop is None or stop >= len(self.tokens): 788 stop = len(self.tokens) - 1 789 790 if start is None or stop < 0: 791 start = 0 792 793 if start > stop: 794 return None 795 796 if isinstance(types, (int, long)): 797 # called with a single type, wrap into set 798 types = set([types]) 799 800 filteredTokens = [ 801 token for token in self.tokens[start:stop] 802 if types is None or token.type in types 803 ] 804 805 if len(filteredTokens) == 0: 806 return None 807 808 return filteredTokens 809 810 811 def LT(self, k): 812 """ 813 Get the ith token from the current position 1..n where k=1 is the 814 first symbol of lookahead. 815 """ 816 817 if self.p == -1: 818 self.fillBuffer() 819 820 if k == 0: 821 return None 822 823 if k < 0: 824 return self.LB(-k) 825 826 i = self.p 827 n = 1 828 # find k good tokens 829 while n < k: 830 # skip off-channel tokens 831 i = self.skipOffTokenChannels(i+1) # leave p on valid token 832 n += 1 833 834 if i > self._range: 835 self._range = i 836 837 try: 838 return self.tokens[i] 839 except IndexError: 840 return self.makeEOFToken() 841 842 843 def LB(self, k): 844 """Look backwards k tokens on-channel tokens""" 845 846 if self.p == -1: 847 self.fillBuffer() 848 849 if k == 0: 850 return None 851 852 if self.p - k < 0: 853 return None 854 855 i = self.p 856 n = 1 857 # find k good tokens looking backwards 858 while n <= k: 859 # skip off-channel tokens 860 i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token 861 n += 1 862 863 if i < 0: 864 return None 865 866 return self.tokens[i] 867 868 869 def get(self, i): 870 """ 871 Return absolute token i; ignore which channel the tokens are on; 872 that is, count all tokens not just on-channel tokens. 873 """ 874 875 return self.tokens[i] 876 877 878 def slice(self, start, stop): 879 if self.p == -1: 880 self.fillBuffer() 881 882 if start < 0 or stop < 0: 883 return None 884 885 return self.tokens[start:stop+1] 886 887 888 def LA(self, i): 889 return self.LT(i).type 890 891 892 def mark(self): 893 self.lastMarker = self.index() 894 return self.lastMarker 895 896 897 def release(self, marker=None): 898 # no resources to release 899 pass 900 901 902 def size(self): 903 return len(self.tokens) 904 905 906 def range(self): 907 return self._range 908 909 910 def index(self): 911 return self.p 912 913 914 def rewind(self, marker=None): 915 if marker is None: 916 marker = self.lastMarker 917 918 self.seek(marker) 919 920 921 def seek(self, index): 922 self.p = index 923 924 925 def getTokenSource(self): 926 return self.tokenSource 927 928 929 def getSourceName(self): 930 return self.tokenSource.getSourceName() 931 932 933 def toString(self, start=None, stop=None): 934 if self.p == -1: 935 self.fillBuffer() 936 937 if start is None: 938 start = 0 939 elif not isinstance(start, int): 940 start = start.index 941 942 if stop is None: 943 stop = len(self.tokens) - 1 944 elif not isinstance(stop, int): 945 stop = stop.index 946 947 if stop >= len(self.tokens): 948 stop = len(self.tokens) - 1 949 950 return ''.join([t.text for t in self.tokens[start:stop+1]]) 951 952 953class RewriteOperation(object): 954 """@brief Internal helper class.""" 955 956 def __init__(self, stream, index, text): 957 self.stream = stream 958 959 # What index into rewrites List are we? 960 self.instructionIndex = None 961 962 # Token buffer index. 963 self.index = index 964 self.text = text 965 966 def execute(self, buf): 967 """Execute the rewrite operation by possibly adding to the buffer. 968 Return the index of the next token to operate on. 969 """ 970 971 return self.index 972 973 def toString(self): 974 opName = self.__class__.__name__ 975 return '<%s@%d:"%s">' % ( 976 opName, self.index, self.text) 977 978 __str__ = toString 979 __repr__ = toString 980 981 982class InsertBeforeOp(RewriteOperation): 983 """@brief Internal helper class.""" 984 985 def execute(self, buf): 986 buf.write(self.text) 987 if self.stream.tokens[self.index].type != EOF: 988 buf.write(self.stream.tokens[self.index].text) 989 return self.index + 1 990 991 992class ReplaceOp(RewriteOperation): 993 """ 994 @brief Internal helper class. 995 996 I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 997 instructions. 998 """ 999 1000 def __init__(self, stream, first, last, text): 1001 RewriteOperation.__init__(self, stream, first, text) 1002 self.lastIndex = last 1003 1004 1005 def execute(self, buf): 1006 if self.text is not None: 1007 buf.write(self.text) 1008 1009 return self.lastIndex + 1 1010 1011 1012 def toString(self): 1013 if self.text is None: 1014 return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex) 1015 1016 return '<ReplaceOp@%d..%d:"%s">' % ( 1017 self.index, self.lastIndex, self.text) 1018 1019 __str__ = toString 1020 __repr__ = toString 1021 1022 1023class TokenRewriteStream(CommonTokenStream): 1024 """@brief CommonTokenStream that can be modified. 1025 1026 Useful for dumping out the input stream after doing some 1027 augmentation or other manipulations. 1028 1029 You can insert stuff, replace, and delete chunks. Note that the 1030 operations are done lazily--only if you convert the buffer to a 1031 String. This is very efficient because you are not moving data around 1032 all the time. As the buffer of tokens is converted to strings, the 1033 toString() method(s) check to see if there is an operation at the 1034 current index. If so, the operation is done and then normal String 1035 rendering continues on the buffer. This is like having multiple Turing 1036 machine instruction streams (programs) operating on a single input tape. :) 1037 1038 Since the operations are done lazily at toString-time, operations do not 1039 screw up the token index values. That is, an insert operation at token 1040 index i does not change the index values for tokens i+1..n-1. 1041 1042 Because operations never actually alter the buffer, you may always get 1043 the original token stream back without undoing anything. Since 1044 the instructions are queued up, you can easily simulate transactions and 1045 roll back any changes if there is an error just by removing instructions. 1046 For example, 1047 1048 CharStream input = new ANTLRFileStream("input"); 1049 TLexer lex = new TLexer(input); 1050 TokenRewriteStream tokens = new TokenRewriteStream(lex); 1051 T parser = new T(tokens); 1052 parser.startRule(); 1053 1054 Then in the rules, you can execute 1055 Token t,u; 1056 ... 1057 input.insertAfter(t, "text to put after t");} 1058 input.insertAfter(u, "text after u");} 1059 System.out.println(tokens.toString()); 1060 1061 Actually, you have to cast the 'input' to a TokenRewriteStream. :( 1062 1063 You can also have multiple "instruction streams" and get multiple 1064 rewrites from a single pass over the input. Just name the instruction 1065 streams and use that name again when printing the buffer. This could be 1066 useful for generating a C file and also its header file--all from the 1067 same buffer: 1068 1069 tokens.insertAfter("pass1", t, "text to put after t");} 1070 tokens.insertAfter("pass2", u, "text after u");} 1071 System.out.println(tokens.toString("pass1")); 1072 System.out.println(tokens.toString("pass2")); 1073 1074 If you don't use named rewrite streams, a "default" stream is used as 1075 the first example shows. 1076 """ 1077 1078 DEFAULT_PROGRAM_NAME = "default" 1079 MIN_TOKEN_INDEX = 0 1080 1081 def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 1082 CommonTokenStream.__init__(self, tokenSource, channel) 1083 1084 # You may have multiple, named streams of rewrite operations. 1085 # I'm calling these things "programs." 1086 # Maps String (name) -> rewrite (List) 1087 self.programs = {} 1088 self.programs[self.DEFAULT_PROGRAM_NAME] = [] 1089 1090 # Map String (program name) -> Integer index 1091 self.lastRewriteTokenIndexes = {} 1092 1093 1094 def rollback(self, *args): 1095 """ 1096 Rollback the instruction stream for a program so that 1097 the indicated instruction (via instructionIndex) is no 1098 longer in the stream. UNTESTED! 1099 """ 1100 1101 if len(args) == 2: 1102 programName = args[0] 1103 instructionIndex = args[1] 1104 elif len(args) == 1: 1105 programName = self.DEFAULT_PROGRAM_NAME 1106 instructionIndex = args[0] 1107 else: 1108 raise TypeError("Invalid arguments") 1109 1110 p = self.programs.get(programName, None) 1111 if p is not None: 1112 self.programs[programName] = ( 1113 p[self.MIN_TOKEN_INDEX:instructionIndex]) 1114 1115 1116 def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): 1117 """Reset the program so that no instructions exist""" 1118 1119 self.rollback(programName, self.MIN_TOKEN_INDEX) 1120 1121 1122 def insertAfter(self, *args): 1123 if len(args) == 2: 1124 programName = self.DEFAULT_PROGRAM_NAME 1125 index = args[0] 1126 text = args[1] 1127 1128 elif len(args) == 3: 1129 programName = args[0] 1130 index = args[1] 1131 text = args[2] 1132 1133 else: 1134 raise TypeError("Invalid arguments") 1135 1136 if isinstance(index, Token): 1137 # index is a Token, grap the stream index from it 1138 index = index.index 1139 1140 # to insert after, just insert before next index (even if past end) 1141 self.insertBefore(programName, index+1, text) 1142 1143 1144 def insertBefore(self, *args): 1145 if len(args) == 2: 1146 programName = self.DEFAULT_PROGRAM_NAME 1147 index = args[0] 1148 text = args[1] 1149 1150 elif len(args) == 3: 1151 programName = args[0] 1152 index = args[1] 1153 text = args[2] 1154 1155 else: 1156 raise TypeError("Invalid arguments") 1157 1158 if isinstance(index, Token): 1159 # index is a Token, grap the stream index from it 1160 index = index.index 1161 1162 op = InsertBeforeOp(self, index, text) 1163 rewrites = self.getProgram(programName) 1164 op.instructionIndex = len(rewrites) 1165 rewrites.append(op) 1166 1167 1168 def replace(self, *args): 1169 if len(args) == 2: 1170 programName = self.DEFAULT_PROGRAM_NAME 1171 first = args[0] 1172 last = args[0] 1173 text = args[1] 1174 1175 elif len(args) == 3: 1176 programName = self.DEFAULT_PROGRAM_NAME 1177 first = args[0] 1178 last = args[1] 1179 text = args[2] 1180 1181 elif len(args) == 4: 1182 programName = args[0] 1183 first = args[1] 1184 last = args[2] 1185 text = args[3] 1186 1187 else: 1188 raise TypeError("Invalid arguments") 1189 1190 if isinstance(first, Token): 1191 # first is a Token, grap the stream index from it 1192 first = first.index 1193 1194 if isinstance(last, Token): 1195 # last is a Token, grap the stream index from it 1196 last = last.index 1197 1198 if first > last or first < 0 or last < 0 or last >= len(self.tokens): 1199 raise ValueError( 1200 "replace: range invalid: %d..%d (size=%d)" 1201 % (first, last, len(self.tokens))) 1202 1203 op = ReplaceOp(self, first, last, text) 1204 rewrites = self.getProgram(programName) 1205 op.instructionIndex = len(rewrites) 1206 rewrites.append(op) 1207 1208 1209 def delete(self, *args): 1210 self.replace(*(list(args) + [None])) 1211 1212 1213 def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): 1214 return self.lastRewriteTokenIndexes.get(programName, -1) 1215 1216 1217 def setLastRewriteTokenIndex(self, programName, i): 1218 self.lastRewriteTokenIndexes[programName] = i 1219 1220 1221 def getProgram(self, name): 1222 p = self.programs.get(name, None) 1223 if p is None: 1224 p = self.initializeProgram(name) 1225 1226 return p 1227 1228 1229 def initializeProgram(self, name): 1230 p = [] 1231 self.programs[name] = p 1232 return p 1233 1234 1235 def toOriginalString(self, start=None, end=None): 1236 if self.p == -1: 1237 self.fillBuffer() 1238 1239 if start is None: 1240 start = self.MIN_TOKEN_INDEX 1241 if end is None: 1242 end = self.size() - 1 1243 1244 buf = StringIO() 1245 i = start 1246 while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1247 if self.get(i).type != EOF: 1248 buf.write(self.get(i).text) 1249 i += 1 1250 1251 return buf.getvalue() 1252 1253 1254 def toString(self, *args): 1255 if self.p == -1: 1256 self.fillBuffer() 1257 1258 if len(args) == 0: 1259 programName = self.DEFAULT_PROGRAM_NAME 1260 start = self.MIN_TOKEN_INDEX 1261 end = self.size() - 1 1262 1263 elif len(args) == 1: 1264 programName = args[0] 1265 start = self.MIN_TOKEN_INDEX 1266 end = self.size() - 1 1267 1268 elif len(args) == 2: 1269 programName = self.DEFAULT_PROGRAM_NAME 1270 start = args[0] 1271 end = args[1] 1272 1273 if start is None: 1274 start = self.MIN_TOKEN_INDEX 1275 elif not isinstance(start, int): 1276 start = start.index 1277 1278 if end is None: 1279 end = len(self.tokens) - 1 1280 elif not isinstance(end, int): 1281 end = end.index 1282 1283 # ensure start/end are in range 1284 if end >= len(self.tokens): 1285 end = len(self.tokens) - 1 1286 1287 if start < 0: 1288 start = 0 1289 1290 rewrites = self.programs.get(programName) 1291 if rewrites is None or len(rewrites) == 0: 1292 # no instructions to execute 1293 return self.toOriginalString(start, end) 1294 1295 buf = StringIO() 1296 1297 # First, optimize instruction stream 1298 indexToOp = self.reduceToSingleOperationPerIndex(rewrites) 1299 1300 # Walk buffer, executing instructions and emitting tokens 1301 i = start 1302 while i <= end and i < len(self.tokens): 1303 op = indexToOp.get(i) 1304 # remove so any left have index size-1 1305 try: 1306 del indexToOp[i] 1307 except KeyError: 1308 pass 1309 1310 t = self.tokens[i] 1311 if op is None: 1312 # no operation at that index, just dump token 1313 if t.type != EOF: 1314 buf.write(t.text) 1315 i += 1 # move to next token 1316 1317 else: 1318 i = op.execute(buf) # execute operation and skip 1319 1320 # include stuff after end if it's last index in buffer 1321 # So, if they did an insertAfter(lastValidIndex, "foo"), include 1322 # foo if end==lastValidIndex. 1323 if end == len(self.tokens) - 1: 1324 # Scan any remaining operations after last token 1325 # should be included (they will be inserts). 1326 for i in sorted(indexToOp.keys()): 1327 op = indexToOp[i] 1328 if op.index >= len(self.tokens)-1: 1329 buf.write(op.text) 1330 1331 return buf.getvalue() 1332 1333 __str__ = toString 1334 1335 1336 def reduceToSingleOperationPerIndex(self, rewrites): 1337 """ 1338 We need to combine operations and report invalid operations (like 1339 overlapping replaces that are not completed nested). Inserts to 1340 same index need to be combined etc... Here are the cases: 1341 1342 I.i.u I.j.v leave alone, nonoverlapping 1343 I.i.u I.i.v combine: Iivu 1344 1345 R.i-j.u R.x-y.v | i-j in x-y delete first R 1346 R.i-j.u R.i-j.v delete first R 1347 R.i-j.u R.x-y.v | x-y in i-j ERROR 1348 R.i-j.u R.x-y.v | boundaries overlap ERROR 1349 1350 Delete special case of replace (text==null): 1351 D.i-j.u D.x-y.v | boundaries overlapcombine to 1352 max(min)..max(right) 1353 1354 I.i.u R.x-y.v | i in (x+1)-ydelete I (since 1355 insert before we're not deleting 1356 i) 1357 I.i.u R.x-y.v | i not in (x+1)-yleave alone, 1358 nonoverlapping 1359 1360 R.x-y.v I.i.u | i in x-y ERROR 1361 R.x-y.v I.x.u R.x-y.uv (combine, delete I) 1362 R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 1363 1364 I.i.u = insert u before op @ index i 1365 R.x-y.u = replace x-y indexed tokens with u 1366 1367 First we need to examine replaces. For any replace op: 1368 1369 1. wipe out any insertions before op within that range. 1370 2. Drop any replace op before that is contained completely within 1371 that range. 1372 3. Throw exception upon boundary overlap with any previous replace. 1373 1374 Then we can deal with inserts: 1375 1376 1. for any inserts to same index, combine even if not adjacent. 1377 2. for any prior replace with same left boundary, combine this 1378 insert with replace and delete this replace. 1379 3. throw exception if index in same range as previous replace 1380 1381 Don't actually delete; make op null in list. Easier to walk list. 1382 Later we can throw as we add to index -> op map. 1383 1384 Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 1385 inserted stuff would be before the replace range. But, if you 1386 add tokens in front of a method body '{' and then delete the method 1387 body, I think the stuff before the '{' you added should disappear too. 1388 1389 Return a map from token index to operation. 1390 """ 1391 1392 # WALK REPLACES 1393 for i, rop in enumerate(rewrites): 1394 if rop is None: 1395 continue 1396 1397 if not isinstance(rop, ReplaceOp): 1398 continue 1399 1400 # Wipe prior inserts within range 1401 for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1402 if iop.index == rop.index: 1403 # E.g., insert before 2, delete 2..2; update replace 1404 # text to include insert before, kill insert 1405 rewrites[iop.instructionIndex] = None 1406 rop.text = self.catOpText(iop.text, rop.text) 1407 1408 elif iop.index > rop.index and iop.index <= rop.lastIndex: 1409 # delete insert as it's a no-op. 1410 rewrites[j] = None 1411 1412 # Drop any prior replaces contained within 1413 for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): 1414 if (prevRop.index >= rop.index 1415 and prevRop.lastIndex <= rop.lastIndex): 1416 # delete replace as it's a no-op. 1417 rewrites[j] = None 1418 continue 1419 1420 # throw exception unless disjoint or identical 1421 disjoint = (prevRop.lastIndex < rop.index 1422 or prevRop.index > rop.lastIndex) 1423 same = (prevRop.index == rop.index 1424 and prevRop.lastIndex == rop.lastIndex) 1425 1426 # Delete special case of replace (text==null): 1427 # D.i-j.u D.x-y.v| boundaries overlapcombine to 1428 # max(min)..max(right) 1429 if prevRop.text is None and rop.text is None and not disjoint: 1430 # kill first delete 1431 rewrites[prevRop.instructionIndex] = None 1432 1433 rop.index = min(prevRop.index, rop.index) 1434 rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex) 1435 1436 elif not disjoint and not same: 1437 raise ValueError( 1438 "replace op boundaries of %s overlap with previous %s" 1439 % (rop, prevRop)) 1440 1441 # WALK INSERTS 1442 for i, iop in enumerate(rewrites): 1443 if iop is None: 1444 continue 1445 1446 if not isinstance(iop, InsertBeforeOp): 1447 continue 1448 1449 # combine current insert with prior if any at same index 1450 for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1451 if prevIop.index == iop.index: # combine objects 1452 # convert to strings...we're in process of toString'ing 1453 # whole token buffer so no lazy eval issue with any 1454 # templates 1455 iop.text = self.catOpText(iop.text, prevIop.text) 1456 # delete redundant prior insert 1457 rewrites[j] = None 1458 1459 # look for replaces where iop.index is in range; error 1460 for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): 1461 if iop.index == rop.index: 1462 rop.text = self.catOpText(iop.text, rop.text) 1463 # delete current insert 1464 rewrites[i] = None 1465 continue 1466 1467 if iop.index >= rop.index and iop.index <= rop.lastIndex: 1468 raise ValueError( 1469 "insert op %s within boundaries of previous %s" 1470 % (iop, rop)) 1471 1472 m = {} 1473 for i, op in enumerate(rewrites): 1474 if op is None: 1475 # ignore deleted ops 1476 continue 1477 1478 assert op.index not in m, "should only be one op per index" 1479 m[op.index] = op 1480 1481 return m 1482 1483 1484 def catOpText(self, a, b): 1485 x = "" 1486 y = "" 1487 if a is not None: 1488 x = a 1489 if b is not None: 1490 y = b 1491 return x + y 1492 1493 1494 def getKindOfOps(self, rewrites, kind, before=None): 1495 """Get all operations before an index of a particular kind.""" 1496 1497 if before is None: 1498 before = len(rewrites) 1499 elif before > len(rewrites): 1500 before = len(rewrites) 1501 1502 for i, op in enumerate(rewrites[:before]): 1503 if op is None: 1504 # ignore deleted 1505 continue 1506 if op.__class__ == kind: 1507 yield i, op 1508 1509 1510 def toDebugString(self, start=None, end=None): 1511 if start is None: 1512 start = self.MIN_TOKEN_INDEX 1513 if end is None: 1514 end = self.size() - 1 1515 1516 buf = StringIO() 1517 i = start 1518 while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1519 buf.write(self.get(i)) 1520 i += 1 1521 1522 return buf.getvalue() 1523