1ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov"""ANTLR3 runtime package"""
2ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
3ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# begin[licence]
4ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
5ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# [The "BSD licence"]
6ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Copyright (c) 2005-2008 Terence Parr
7ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# All rights reserved.
8ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
9ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Redistribution and use in source and binary forms, with or without
10ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# modification, are permitted provided that the following conditions
11ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# are met:
12ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 1. Redistributions of source code must retain the above copyright
13ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    notice, this list of conditions and the following disclaimer.
14ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 2. Redistributions in binary form must reproduce the above copyright
15ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    notice, this list of conditions and the following disclaimer in the
16ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    documentation and/or other materials provided with the distribution.
17ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# 3. The name of the author may not be used to endorse or promote products
18ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    derived from this software without specific prior written permission.
19ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
20ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
31ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# end[licence]
32ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
33ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovimport codecs
34ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom StringIO import StringIO
35ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
36ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom antlr3.constants import DEFAULT_CHANNEL, EOF
37ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovfrom antlr3.tokens import Token, CommonToken
38ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
39ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
40ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
41ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
42ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# basic interfaces
43ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   IntStream
44ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    +- CharStream
45ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#    \- TokenStream
46ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
47ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# subclasses must implemented all methods
48ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
49ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
50ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
51ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass IntStream(object):
52ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
53ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief Base interface for streams of integer values.
54ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
55ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    A simple stream of integers used when all I care about is the char
56ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    or token type sequence (such as interpretation).
57ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
58ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
59ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def consume(self):
60ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
61ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
62ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
63ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def LA(self, i):
64ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """Get int at current input pointer + i ahead where i=1 is next int.
65ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
66ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Negative indexes are allowed.  LA(-1) is previous token (token
67ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	just matched).  LA(-i) where i is before first token should
68ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	yield -1, invalid char / EOF.
69ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
70ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
71ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
72ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
73ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
74ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def mark(self):
75ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
76ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Tell the stream to start buffering if it hasn't already.  Return
77ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        current input position, index(), or some other marker so that
78ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        when passed to rewind() you get back to the same spot.
79ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        rewind(mark()) should not affect the input cursor.  The Lexer
80ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        track line/col info as well as input index so its markers are
81ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        not pure input indexes.  Same for tree node streams.
82ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
83ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
84ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
85ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
86ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
87ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def index(self):
88ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
89ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Return the current input symbol index 0..n where n indicates the
90ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        last symbol has been read.  The index is the symbol about to be
91ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        read not the most recently read symbol.
92ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
93ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
94ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
95ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
96ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
97ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def rewind(self, marker=None):
98ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
99ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Reset the stream so that next call to index would return marker.
100ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        The marker will usually be index() but it doesn't have to be.  It's
101ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        just a marker to indicate what state the stream was in.  This is
102ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        essentially calling release() and seek().  If there are markers
103ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        created after this marker argument, this routine must unroll them
104ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        like a stack.  Assume the state the stream was in when this marker
105ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        was created.
106ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
107ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        If marker is None:
108ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Rewind to the input position of the last marker.
109ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Used currently only after a cyclic DFA and just
110ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        before starting a sem/syn predicate to get the
111ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        input position back to the start of the decision.
112ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Do not "pop" the marker off the state.  mark(i)
113ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        and rewind(i) should balance still. It is
114ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        like invoking rewind(last marker) but it should not "pop"
115ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        the marker off.  It's like seek(last marker's input position).
116ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
117ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
118ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
119ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
120ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
121ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def release(self, marker=None):
122ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
123ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        You may want to commit to a backtrack but don't want to force the
124ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        stream to keep bookkeeping objects around for a marker that is
125ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        no longer necessary.  This will have the same behavior as
126ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        rewind() except it releases resources without the backward seek.
127ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        This must throw away resources for all markers back to the marker
128ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        argument.  So if you're nested 5 levels of mark(), and then release(2)
129ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        you have to release resources for depths 2..5.
130ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
131ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
132ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
133ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
134ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
135ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def seek(self, index):
136ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
137ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Set the input cursor to the position indicated by index.  This is
138ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        normally used to seek ahead in the input stream.  No buffering is
139ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        required to do this unless you know your stream will use seek to
140ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        move backwards such as when backtracking.
141ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
142ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        This is different from rewind in its multi-directional
143ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        requirement and in that its argument is strictly an input cursor
144ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        (index).
145ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
146ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        For char streams, seeking forward must update the stream state such
147ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        as line number.  For seeking backwards, you will be presumably
148ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        backtracking using the mark/rewind mechanism that restores state and
149ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        so this method does not need to update state when seeking backwards.
150ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
151ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Currently, this method is only used for efficient backtracking using
152ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        memoization, but in the future it may be used for incremental parsing.
153ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
154ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        The index is 0..n-1.  A seek to position i means that LA(1) will
155ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return the ith symbol.  So, seeking to 0 means LA(1) will return the
156ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        first element in the stream.
157ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
158ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
159ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
160ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
161ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
162ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def size(self):
163ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
164ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Only makes sense for streams that buffer everything up probably, but
165ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        might be useful to display the entire stream or for testing.  This
166ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        value includes a single EOF.
167ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
168ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
169ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
170ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
171ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
172ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getSourceName(self):
173ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
174ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Where are you getting symbols from?  Normally, implementations will
175ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        pass the buck all the way to the lexer who can ask its input stream
176ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        for the file name or whatever.
177ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
178ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
179ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
180ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
181ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
182ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CharStream(IntStream):
183ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
184ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief A source of characters for an ANTLR lexer.
185ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
186ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    This is an abstract class that must be implemented by a subclass.
187ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
188ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
189ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
190ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    # pylint does not realize that this is an interface, too
191ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    #pylint: disable-msg=W0223
192ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
193ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    EOF = -1
194ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
195ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
196ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def substring(self, start, stop):
197ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
198ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        For infinite streams, you don't need this; primarily I'm providing
199ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        a useful interface for action code.  Just make sure actions don't
200ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        use this on streams that don't support it.
201ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
202ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
203ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
204ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
205ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
206ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def LT(self, i):
207ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
208ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Get the ith character of lookahead.  This is the same usually as
209ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        LA(i).  This will be used for labels in the generated
210ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        lexer code.  I'd prefer to return a char here type-wise, but it's
211ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        probably better to be 32-bit clean and be consistent with LA.
212ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
213ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
214ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
215ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
216ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
217ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getLine(self):
218ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """ANTLR tracks the line information automatically"""
219ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
220ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
221ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
222ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
223ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setLine(self, line):
224ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
225ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Because this stream can rewind, we need to be able to reset the line
226ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
227ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
228ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
229ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
230ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
231ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getCharPositionInLine(self):
232ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
233ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        The index of the character relative to the beginning of the line 0..n-1
234ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
235ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
236ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
237ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
238ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
239ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setCharPositionInLine(self, pos):
240ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
241ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
242ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
243ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass TokenStream(IntStream):
244ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
245ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
246ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief A stream of tokens accessing tokens from a TokenSource
247ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
248ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    This is an abstract class that must be implemented by a subclass.
249ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
250ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
251ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
252ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    # pylint does not realize that this is an interface, too
253ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    #pylint: disable-msg=W0223
254ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
255ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def LT(self, k):
256ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
257ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Get Token at current input pointer + i ahead where i=1 is next Token.
258ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        i<0 indicates tokens in the past.  So -1 is previous token and -2 is
259ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
260ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Return null for LT(0) and any index that results in an absolute address
261ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        that is negative.
262ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
263ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
264ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
265ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
266ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
267ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def range(self):
268ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
269ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        How far ahead has the stream been asked to look?  The return
270ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        value is a valid index from 0..n-1.
271ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
272ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
273ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
274ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
275ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
276ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def get(self, i):
277ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
278ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Get a token at an absolute index i; 0..n-1.  This is really only
279ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        needed for profiling and debugging and token stream rewriting.
280ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        If you don't want to buffer up tokens, then this method makes no
281ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        sense for you.  Naturally you can't use the rewrite stream feature.
282ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        I believe DebugTokenStream can easily be altered to not use
283ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        this method, removing the dependency.
284ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
285ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
286ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
287ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
288ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
289ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getTokenSource(self):
290ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
291ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Where is this stream pulling tokens from?  This is not the name, but
292ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        the object that provides Token objects.
293ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
294ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
295ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
296ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
297ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
298ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def toString(self, start=None, stop=None):
299ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
300ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Return the text of all tokens from start to stop, inclusive.
301ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        If the stream does not buffer all the tokens then it can just
302ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return "" or null;  Users should not access $ruleLabel.text in
303ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        an action of course in that case.
304ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
305ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Because the user is not required to use a token with an index stored
306ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        in it, we must provide a means for two token objects themselves to
307ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        indicate the start/end location.  Most often this will just delegate
308ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        to the other toString(int,int).  This is also parallel with
309ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        the TreeNodeStream.toString(Object,Object).
310ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
311ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
312ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        raise NotImplementedError
313ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
314ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
315ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
316ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
317ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# character streams for use in lexers
318ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   CharStream
319ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   \- ANTLRStringStream
320ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
321ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
322ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
323ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
324ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRStringStream(CharStream):
325ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
326ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief CharStream that pull data from a unicode string.
327ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
328ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    A pretty quick CharStream that pulls all data from an array
329ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    directly.  Every method call counts in the lexer.
330ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
331ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
332ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
333ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
334ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def __init__(self, data):
335ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
336ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param data This should be a unicode string holding the data you want
337ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           to parse. If you pass in a byte string, the Lexer will choke on
338ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           non-ascii data.
339ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
340ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
341ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
342ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        CharStream.__init__(self)
343ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
344ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov  	# The data being scanned
345ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.strdata = unicode(data)
346ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.data = [ord(c) for c in self.strdata]
347ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
348ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# How many characters are actually in the buffer
349ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.n = len(data)
350ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
351ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 	# 0..n-1 index into string of next char
352ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = 0
353ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
354ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# line number 1..n within the input
355ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.line = 1
356ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
357ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov 	# The index of the character relative to the beginning of the
358ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # line 0..n-1
359ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.charPositionInLine = 0
360ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
361ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# A list of CharStreamState objects that tracks the stream state
362ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # values line, charPositionInLine, and p that can change as you
363ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # move through the input stream.  Indexed from 0..markDepth-1.
364ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self._markers = [ ]
365ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.lastMarker = None
366ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.markDepth = 0
367ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
368ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # What is name or source of this char stream?
369ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.name = None
370ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
371ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
372ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def reset(self):
373ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
374ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Reset the stream so that it's in the same state it was
375ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        when the object was created *except* the data array is not
376ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        touched.
377ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
378ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
379ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = 0
380ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.line = 1
381ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.charPositionInLine = 0
382ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self._markers = [ ]
383ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
384ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
385ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def consume(self):
386ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
387ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            if self.data[self.p] == 10: # \n
388ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                self.line += 1
389ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                self.charPositionInLine = 0
390ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            else:
391ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                self.charPositionInLine += 1
392ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
393ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.p += 1
394ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
395ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        except IndexError:
396ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            # happend when we reached EOF and self.data[self.p] fails
397ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            # just do nothing
398ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            pass
399ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
400ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
401ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
402ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def LA(self, i):
403ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if i == 0:
404ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return 0 # undefined
405ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
406ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if i < 0:
407ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
408ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
409ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
410ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return self.data[self.p+i-1]
411ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        except IndexError:
412ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return EOF
413ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
414ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
415ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
416ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def LT(self, i):
417ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if i == 0:
418ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return 0 # undefined
419ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
420ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if i < 0:
421ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
422ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
423ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
424ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return self.strdata[self.p+i-1]
425ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        except IndexError:
426ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return EOF
427ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
428ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
429ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def index(self):
430ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
431ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Return the current input symbol index 0..n where n indicates the
432ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        last symbol has been read.  The index is the index of char to
433ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        be returned from LA(1).
434ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
435ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
436ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.p
437ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
438ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
439ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def size(self):
440ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.n
441ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
442ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
443ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def mark(self):
444ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        state = (self.p, self.line, self.charPositionInLine)
445ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
446ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self._markers[self.markDepth] = state
447ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        except IndexError:
448ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self._markers.append(state)
449ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.markDepth += 1
450ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
451ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.lastMarker = self.markDepth
452ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
453ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.lastMarker
454ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
455ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
456ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def rewind(self, marker=None):
457ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if marker is None:
458ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            marker = self.lastMarker
459ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
460ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        p, line, charPositionInLine = self._markers[marker-1]
461ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
462ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.seek(p)
463ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.line = line
464ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.charPositionInLine = charPositionInLine
465ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.release(marker)
466ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
467ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
468ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def release(self, marker=None):
469ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if marker is None:
470ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            marker = self.lastMarker
471ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
472ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.markDepth = marker-1
473ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
474ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
475ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def seek(self, index):
476ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
477ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        consume() ahead until p==index; can't just set p=index as we must
478ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        update line and charPositionInLine.
479ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
480ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
481ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if index <= self.p:
482ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.p = index # just jump; don't update stream state (line, ...)
483ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            return
484ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
485ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # seek forward, consume until p hits index
486ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        while self.p < index:
487ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.consume()
488ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
489ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
490ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def substring(self, start, stop):
491ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.strdata[start:stop+1]
492ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
493ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
494ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getLine(self):
495ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """Using setter/getter methods is deprecated. Use o.line instead."""
496ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.line
497ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
498ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
499ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getCharPositionInLine(self):
500ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
501ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Using setter/getter methods is deprecated. Use o.charPositionInLine
502ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        instead.
503ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
504ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.charPositionInLine
505ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
506ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
507ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setLine(self, line):
508ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """Using setter/getter methods is deprecated. Use o.line instead."""
509ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.line = line
510ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
511ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
512ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setCharPositionInLine(self, pos):
513ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
514ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Using setter/getter methods is deprecated. Use o.charPositionInLine
515ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        instead.
516ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
517ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.charPositionInLine = pos
518ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
519ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
520ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getSourceName(self):
521ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.name
522ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
523ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
524ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRFileStream(ANTLRStringStream):
525ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
526ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief CharStream that opens a file to read the data.
527ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
528ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    This is a char buffer stream that is loaded from a file
529ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    all at once when you construct the object.
530ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
531ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
532ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def __init__(self, fileName, encoding=None):
533ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
534ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param fileName The path to the file to be opened. The file will be
535ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           opened with mode 'rb'.
536ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
537ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param encoding If you set the optional encoding argument, then the
538ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           data will be decoded on the fly.
539ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
540ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
541ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
542ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.fileName = fileName
543ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
544ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        fp = codecs.open(fileName, 'rb', encoding)
545ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
546ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            data = fp.read()
547ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        finally:
548ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            fp.close()
549ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
550ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        ANTLRStringStream.__init__(self, data)
551ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
552ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
553ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getSourceName(self):
554ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """Deprecated, access o.fileName directly."""
555ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
556ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.fileName
557ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
558ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
559ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass ANTLRInputStream(ANTLRStringStream):
560ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
561ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief CharStream that reads data from a file-like object.
562ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
563ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    This is a char buffer stream that is loaded from a file like object
564ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    all at once when you construct the object.
565ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
566ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    All input is consumed from the file, but it is not closed.
567ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
568ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
569ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def __init__(self, file, encoding=None):
570ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
571ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param file A file-like object holding your input. Only the read()
572ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           method must be implemented.
573ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
574ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param encoding If you set the optional encoding argument, then the
575ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov           data will be decoded on the fly.
576ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
577ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
578ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
579ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if encoding is not None:
580ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            # wrap input in a decoding reader
581ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            reader = codecs.lookup(encoding)[2]
582ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            file = reader(file)
583ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
584ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        data = file.read()
585ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
586ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        ANTLRStringStream.__init__(self, data)
587ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
588ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
589ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# I guess the ANTLR prefix exists only to avoid a name clash with some Java
590ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# mumbojumbo. A plain "StringStream" looks better to me, which should be
591ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# the preferred name in Python.
592ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovStringStream = ANTLRStringStream
593ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovFileStream = ANTLRFileStream
594ee451cb395940862dad63c85adfe8f2fd55e864cSvet GanovInputStream = ANTLRInputStream
595ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
596ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
597ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
598ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
599ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov# Token streams
600ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   TokenStream
601ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   +- CommonTokenStream
602ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#   \- TokenRewriteStream
603ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov#
604ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov############################################################################
605ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
606ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
607ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganovclass CommonTokenStream(TokenStream):
608ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
609ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    @brief The most common stream of tokens
610ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
611ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    The most common stream of tokens is one where every token is buffered up
612ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    and tokens are prefiltered for a certain channel (the parser will only
613ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    see these tokens and cannot change the filter channel number during the
614ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    parse).
615ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    """
616ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
617ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
618ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
619ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param tokenSource A TokenSource instance (usually a Lexer) to pull
620ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            the tokens from.
621ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
622ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        @param channel Skip tokens on any channel but this one; this is how we
623ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            skip whitespace...
624ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
625ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
626ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
627ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        TokenStream.__init__(self)
628ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
629ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.tokenSource = tokenSource
630ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
631ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# Record every single token pulled from the source so we can reproduce
632ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # chunks of it later.
633ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.tokens = []
634ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
635ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# Map<tokentype, channel> to override some Tokens' channel numbers
636ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.channelOverrideMap = {}
637ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
638ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# Set<tokentype>; discard any tokens with this type
639ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.discardSet = set()
640ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
641ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# Skip tokens on any channel but this one; this is how we skip
642ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # whitespace...
643ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.channel = channel
644ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
645ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# By default, track all incoming tokens
646ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.discardOffChannelTokens = False
647ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
648ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	# The index into the tokens list of the current token (next token
649ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # to consume).  p==-1 indicates that the tokens list is empty
650ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = -1
651ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
652ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # Remember last marked position
653ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.lastMarker = None
654ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
655ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # how deep have we gone?
656ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self._range = -1
657ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
658ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
659ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def makeEOFToken(self):
660ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return self.tokenSource.makeEOFToken()
661ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
662ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
663ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setTokenSource(self, tokenSource):
664ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """Reset this token stream by setting its token source."""
665ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
666ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.tokenSource = tokenSource
667ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.tokens = []
668ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = -1
669ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.channel = DEFAULT_CHANNEL
670ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
671ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
672ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def reset(self):
673ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = 0
674ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.lastMarker = None
675ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
676ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
677ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def fillBuffer(self):
678ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
679ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Load all tokens from the token source and put in tokens.
680ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	This is done upon first LT request because you might want to
681ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        set some token type / channel overrides before filling buffer.
682ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
683ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
684ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
685ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        index = 0
686ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        t = self.tokenSource.nextToken()
687ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        while t is not None and t.type != EOF:
688ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            discard = False
689ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
690ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            if self.discardSet is not None and t.type in self.discardSet:
691ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                discard = True
692ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
693ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            elif self.discardOffChannelTokens and t.channel != self.channel:
694ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                discard = True
695ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
696ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            # is there a channel override for token type?
697ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            try:
698ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                overrideChannel = self.channelOverrideMap[t.type]
699ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
700ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            except KeyError:
701ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                # no override for this type
702ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                pass
703ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
704ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            else:
705ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                if overrideChannel == self.channel:
706ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                    t.channel = overrideChannel
707ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                else:
708ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                    discard = True
709ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
710ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            if not discard:
711ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                t.index = index
712ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                self.tokens.append(t)
713ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                index += 1
714ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
715ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            t = self.tokenSource.nextToken()
716ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
717ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        # leave p pointing at first token on channel
718ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = 0
719ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.p = self.skipOffTokenChannels(self.p)
720ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
721ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
722ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def consume(self):
723ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
724ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Move the input pointer to the next incoming token.  The stream
725ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        must become active with LT(1) available.  consume() simply
726ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        moves the input pointer so that LT(1) points at the next
727ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        input symbol. Consume at least one token.
728ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
729ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Walk past any token not on the channel the parser is listening to.
730ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
731ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
732ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if self.p < len(self.tokens):
733ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.p += 1
734ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
735ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.p = self.skipOffTokenChannels(self.p) # leave p on valid token
736ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
737ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
738ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def skipOffTokenChannels(self, i):
739ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
740ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Given a starting index, return the index of the first on-channel
741ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        token.
742ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
743ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
744ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        try:
745ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            while self.tokens[i].channel != self.channel:
746ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov                i += 1
747ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        except IndexError:
748ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            # hit the end of token stream
749ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            pass
750ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
751ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return i
752ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
753ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
754ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def skipOffTokenChannelsReverse(self, i):
755ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        while i >= 0 and self.tokens[i].channel != self.channel:
756ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            i -= 1
757ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
758ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        return i
759ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
760ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
761ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def setTokenTypeChannel(self, ttype, channel):
762ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
763ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        A simple filter mechanism whereby you can tell this token stream
764ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        to force all tokens of type ttype to be on channel.  For example,
765ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        when interpreting, we cannot exec actions so we need to tell
766ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        the stream to force all WS and NEWLINE to be a different, ignored
767ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        channel.
768ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov	"""
769ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
770ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.channelOverrideMap[ttype] = channel
771ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
772ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
773ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def discardTokenType(self, ttype):
774ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        self.discardSet.add(ttype)
775ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
776ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
777ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov    def getTokens(self, start=None, stop=None, types=None):
778ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
779ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        Given a start and stop index, return a list of all tokens in
780ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        the token type set.  Return None if no tokens were found.  This
781ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        method looks at both on and off channel tokens.
782ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        """
783ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
784ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if self.p == -1:
785ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov            self.fillBuffer()
786ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov
787ee451cb395940862dad63c85adfe8f2fd55e864cSvet Ganov        if stop is None or stop >= len(self.tokens):
788            stop = len(self.tokens) - 1
789
790        if start is None or stop < 0:
791            start = 0
792
793        if start > stop:
794            return None
795
796        if isinstance(types, (int, long)):
797            # called with a single type, wrap into set
798            types = set([types])
799
800        filteredTokens = [
801            token for token in self.tokens[start:stop]
802            if types is None or token.type in types
803            ]
804
805        if len(filteredTokens) == 0:
806            return None
807
808        return filteredTokens
809
810
811    def LT(self, k):
812        """
813        Get the ith token from the current position 1..n where k=1 is the
814        first symbol of lookahead.
815        """
816
817        if self.p == -1:
818            self.fillBuffer()
819
820        if k == 0:
821            return None
822
823        if k < 0:
824            return self.LB(-k)
825
826        i = self.p
827        n = 1
828        # find k good tokens
829        while n < k:
830            # skip off-channel tokens
831            i = self.skipOffTokenChannels(i+1) # leave p on valid token
832            n += 1
833
834        if i > self._range:
835            self._range = i
836
837        try:
838            return self.tokens[i]
839        except IndexError:
840            return self.makeEOFToken()
841
842
843    def LB(self, k):
844        """Look backwards k tokens on-channel tokens"""
845
846        if self.p == -1:
847            self.fillBuffer()
848
849        if k == 0:
850            return None
851
852        if self.p - k < 0:
853            return None
854
855        i = self.p
856        n = 1
857        # find k good tokens looking backwards
858        while n <= k:
859            # skip off-channel tokens
860            i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token
861            n += 1
862
863        if i < 0:
864            return None
865
866        return self.tokens[i]
867
868
869    def get(self, i):
870        """
871        Return absolute token i; ignore which channel the tokens are on;
872        that is, count all tokens not just on-channel tokens.
873        """
874
875        return self.tokens[i]
876
877
878    def slice(self, start, stop):
879        if self.p == -1:
880            self.fillBuffer()
881
882        if start < 0 or stop < 0:
883            return None
884
885        return self.tokens[start:stop+1]
886
887
888    def LA(self, i):
889        return self.LT(i).type
890
891
892    def mark(self):
893        self.lastMarker = self.index()
894        return self.lastMarker
895
896
897    def release(self, marker=None):
898        # no resources to release
899        pass
900
901
902    def size(self):
903        return len(self.tokens)
904
905
906    def range(self):
907        return self._range
908
909
910    def index(self):
911        return self.p
912
913
914    def rewind(self, marker=None):
915        if marker is None:
916            marker = self.lastMarker
917
918        self.seek(marker)
919
920
921    def seek(self, index):
922        self.p = index
923
924
925    def getTokenSource(self):
926        return self.tokenSource
927
928
929    def getSourceName(self):
930        return self.tokenSource.getSourceName()
931
932
933    def toString(self, start=None, stop=None):
934        if self.p == -1:
935            self.fillBuffer()
936
937        if start is None:
938            start = 0
939        elif not isinstance(start, int):
940            start = start.index
941
942        if stop is None:
943            stop = len(self.tokens) - 1
944        elif not isinstance(stop, int):
945            stop = stop.index
946
947        if stop >= len(self.tokens):
948            stop = len(self.tokens) - 1
949
950        return ''.join([t.text for t in self.tokens[start:stop+1]])
951
952
953class RewriteOperation(object):
954    """@brief Internal helper class."""
955
956    def __init__(self, stream, index, text):
957        self.stream = stream
958
959        # What index into rewrites List are we?
960        self.instructionIndex = None
961
962        # Token buffer index.
963        self.index = index
964        self.text = text
965
966    def execute(self, buf):
967        """Execute the rewrite operation by possibly adding to the buffer.
968        Return the index of the next token to operate on.
969        """
970
971        return self.index
972
973    def toString(self):
974        opName = self.__class__.__name__
975        return '<%s@%d:"%s">' % (
976            opName, self.index, self.text)
977
978    __str__ = toString
979    __repr__ = toString
980
981
982class InsertBeforeOp(RewriteOperation):
983    """@brief Internal helper class."""
984
985    def execute(self, buf):
986        buf.write(self.text)
987        if self.stream.tokens[self.index].type != EOF:
988            buf.write(self.stream.tokens[self.index].text)
989        return self.index + 1
990
991
992class ReplaceOp(RewriteOperation):
993    """
994    @brief Internal helper class.
995
996    I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
997    instructions.
998    """
999
1000    def __init__(self, stream, first, last, text):
1001        RewriteOperation.__init__(self, stream, first, text)
1002        self.lastIndex = last
1003
1004
1005    def execute(self, buf):
1006        if self.text is not None:
1007            buf.write(self.text)
1008
1009        return self.lastIndex + 1
1010
1011
1012    def toString(self):
1013        if self.text is None:
1014            return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex)
1015
1016        return '<ReplaceOp@%d..%d:"%s">' % (
1017            self.index, self.lastIndex, self.text)
1018
1019    __str__ = toString
1020    __repr__ = toString
1021
1022
1023class TokenRewriteStream(CommonTokenStream):
1024    """@brief CommonTokenStream that can be modified.
1025
1026    Useful for dumping out the input stream after doing some
1027    augmentation or other manipulations.
1028
1029    You can insert stuff, replace, and delete chunks.  Note that the
1030    operations are done lazily--only if you convert the buffer to a
1031    String.  This is very efficient because you are not moving data around
1032    all the time.  As the buffer of tokens is converted to strings, the
1033    toString() method(s) check to see if there is an operation at the
1034    current index.  If so, the operation is done and then normal String
1035    rendering continues on the buffer.  This is like having multiple Turing
1036    machine instruction streams (programs) operating on a single input tape. :)
1037
1038    Since the operations are done lazily at toString-time, operations do not
1039    screw up the token index values.  That is, an insert operation at token
1040    index i does not change the index values for tokens i+1..n-1.
1041
1042    Because operations never actually alter the buffer, you may always get
1043    the original token stream back without undoing anything.  Since
1044    the instructions are queued up, you can easily simulate transactions and
1045    roll back any changes if there is an error just by removing instructions.
1046    For example,
1047
1048     CharStream input = new ANTLRFileStream("input");
1049     TLexer lex = new TLexer(input);
1050     TokenRewriteStream tokens = new TokenRewriteStream(lex);
1051     T parser = new T(tokens);
1052     parser.startRule();
1053
1054     Then in the rules, you can execute
1055        Token t,u;
1056        ...
1057        input.insertAfter(t, "text to put after t");}
1058        input.insertAfter(u, "text after u");}
1059        System.out.println(tokens.toString());
1060
1061    Actually, you have to cast the 'input' to a TokenRewriteStream. :(
1062
1063    You can also have multiple "instruction streams" and get multiple
1064    rewrites from a single pass over the input.  Just name the instruction
1065    streams and use that name again when printing the buffer.  This could be
1066    useful for generating a C file and also its header file--all from the
1067    same buffer:
1068
1069        tokens.insertAfter("pass1", t, "text to put after t");}
1070        tokens.insertAfter("pass2", u, "text after u");}
1071        System.out.println(tokens.toString("pass1"));
1072        System.out.println(tokens.toString("pass2"));
1073
1074    If you don't use named rewrite streams, a "default" stream is used as
1075    the first example shows.
1076    """
1077
1078    DEFAULT_PROGRAM_NAME = "default"
1079    MIN_TOKEN_INDEX = 0
1080
1081    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
1082        CommonTokenStream.__init__(self, tokenSource, channel)
1083
1084        # You may have multiple, named streams of rewrite operations.
1085        # I'm calling these things "programs."
1086        #  Maps String (name) -> rewrite (List)
1087        self.programs = {}
1088        self.programs[self.DEFAULT_PROGRAM_NAME] = []
1089
1090 	# Map String (program name) -> Integer index
1091        self.lastRewriteTokenIndexes = {}
1092
1093
1094    def rollback(self, *args):
1095        """
1096        Rollback the instruction stream for a program so that
1097        the indicated instruction (via instructionIndex) is no
1098        longer in the stream.  UNTESTED!
1099        """
1100
1101        if len(args) == 2:
1102            programName = args[0]
1103            instructionIndex = args[1]
1104        elif len(args) == 1:
1105            programName = self.DEFAULT_PROGRAM_NAME
1106            instructionIndex = args[0]
1107        else:
1108            raise TypeError("Invalid arguments")
1109
1110        p = self.programs.get(programName, None)
1111        if p is not None:
1112            self.programs[programName] = (
1113                p[self.MIN_TOKEN_INDEX:instructionIndex])
1114
1115
1116    def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME):
1117        """Reset the program so that no instructions exist"""
1118
1119        self.rollback(programName, self.MIN_TOKEN_INDEX)
1120
1121
1122    def insertAfter(self, *args):
1123        if len(args) == 2:
1124            programName = self.DEFAULT_PROGRAM_NAME
1125            index = args[0]
1126            text = args[1]
1127
1128        elif len(args) == 3:
1129            programName = args[0]
1130            index = args[1]
1131            text = args[2]
1132
1133        else:
1134            raise TypeError("Invalid arguments")
1135
1136        if isinstance(index, Token):
1137            # index is a Token, grap the stream index from it
1138            index = index.index
1139
1140        # to insert after, just insert before next index (even if past end)
1141        self.insertBefore(programName, index+1, text)
1142
1143
1144    def insertBefore(self, *args):
1145        if len(args) == 2:
1146            programName = self.DEFAULT_PROGRAM_NAME
1147            index = args[0]
1148            text = args[1]
1149
1150        elif len(args) == 3:
1151            programName = args[0]
1152            index = args[1]
1153            text = args[2]
1154
1155        else:
1156            raise TypeError("Invalid arguments")
1157
1158        if isinstance(index, Token):
1159            # index is a Token, grap the stream index from it
1160            index = index.index
1161
1162        op = InsertBeforeOp(self, index, text)
1163        rewrites = self.getProgram(programName)
1164        op.instructionIndex = len(rewrites)
1165        rewrites.append(op)
1166
1167
1168    def replace(self, *args):
1169        if len(args) == 2:
1170            programName = self.DEFAULT_PROGRAM_NAME
1171            first = args[0]
1172            last = args[0]
1173            text = args[1]
1174
1175        elif len(args) == 3:
1176            programName = self.DEFAULT_PROGRAM_NAME
1177            first = args[0]
1178            last = args[1]
1179            text = args[2]
1180
1181        elif len(args) == 4:
1182            programName = args[0]
1183            first = args[1]
1184            last = args[2]
1185            text = args[3]
1186
1187        else:
1188            raise TypeError("Invalid arguments")
1189
1190        if isinstance(first, Token):
1191            # first is a Token, grap the stream index from it
1192            first = first.index
1193
1194        if isinstance(last, Token):
1195            # last is a Token, grap the stream index from it
1196            last = last.index
1197
1198        if first > last or first < 0 or last < 0 or last >= len(self.tokens):
1199            raise ValueError(
1200                "replace: range invalid: %d..%d (size=%d)"
1201                % (first, last, len(self.tokens)))
1202
1203        op = ReplaceOp(self, first, last, text)
1204        rewrites = self.getProgram(programName)
1205        op.instructionIndex = len(rewrites)
1206        rewrites.append(op)
1207
1208
1209    def delete(self, *args):
1210        self.replace(*(list(args) + [None]))
1211
1212
1213    def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME):
1214        return self.lastRewriteTokenIndexes.get(programName, -1)
1215
1216
1217    def setLastRewriteTokenIndex(self, programName, i):
1218        self.lastRewriteTokenIndexes[programName] = i
1219
1220
1221    def getProgram(self, name):
1222        p = self.programs.get(name, None)
1223        if p is  None:
1224            p = self.initializeProgram(name)
1225
1226        return p
1227
1228
1229    def initializeProgram(self, name):
1230        p = []
1231        self.programs[name] = p
1232        return p
1233
1234
1235    def toOriginalString(self, start=None, end=None):
1236        if self.p == -1:
1237            self.fillBuffer()
1238
1239        if start is None:
1240            start = self.MIN_TOKEN_INDEX
1241        if end is None:
1242            end = self.size() - 1
1243
1244        buf = StringIO()
1245        i = start
1246        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1247            if self.get(i).type != EOF:
1248                buf.write(self.get(i).text)
1249            i += 1
1250
1251        return buf.getvalue()
1252
1253
1254    def toString(self, *args):
1255        if self.p == -1:
1256            self.fillBuffer()
1257
1258        if len(args) == 0:
1259            programName = self.DEFAULT_PROGRAM_NAME
1260            start = self.MIN_TOKEN_INDEX
1261            end = self.size() - 1
1262
1263        elif len(args) == 1:
1264            programName = args[0]
1265            start = self.MIN_TOKEN_INDEX
1266            end = self.size() - 1
1267
1268        elif len(args) == 2:
1269            programName = self.DEFAULT_PROGRAM_NAME
1270            start = args[0]
1271            end = args[1]
1272
1273        if start is None:
1274            start = self.MIN_TOKEN_INDEX
1275        elif not isinstance(start, int):
1276            start = start.index
1277
1278        if end is None:
1279            end = len(self.tokens) - 1
1280        elif not isinstance(end, int):
1281            end = end.index
1282
1283        # ensure start/end are in range
1284        if end >= len(self.tokens):
1285            end = len(self.tokens) - 1
1286
1287        if start < 0:
1288            start = 0
1289
1290        rewrites = self.programs.get(programName)
1291        if rewrites is None or len(rewrites) == 0:
1292            # no instructions to execute
1293            return self.toOriginalString(start, end)
1294
1295        buf = StringIO()
1296
1297        # First, optimize instruction stream
1298        indexToOp = self.reduceToSingleOperationPerIndex(rewrites)
1299
1300        # Walk buffer, executing instructions and emitting tokens
1301        i = start
1302        while i <= end and i < len(self.tokens):
1303            op = indexToOp.get(i)
1304            # remove so any left have index size-1
1305            try:
1306                del indexToOp[i]
1307            except KeyError:
1308                pass
1309
1310            t = self.tokens[i]
1311            if op is None:
1312                # no operation at that index, just dump token
1313                if t.type != EOF:
1314                    buf.write(t.text)
1315                i += 1 # move to next token
1316
1317            else:
1318                i = op.execute(buf) # execute operation and skip
1319
1320        # include stuff after end if it's last index in buffer
1321        # So, if they did an insertAfter(lastValidIndex, "foo"), include
1322        # foo if end==lastValidIndex.
1323        if end == len(self.tokens) - 1:
1324            # Scan any remaining operations after last token
1325            # should be included (they will be inserts).
1326            for i in sorted(indexToOp.keys()):
1327                op = indexToOp[i]
1328                if op.index >= len(self.tokens)-1:
1329                    buf.write(op.text)
1330
1331        return buf.getvalue()
1332
1333    __str__ = toString
1334
1335
1336    def reduceToSingleOperationPerIndex(self, rewrites):
1337        """
1338        We need to combine operations and report invalid operations (like
1339        overlapping replaces that are not completed nested).  Inserts to
1340        same index need to be combined etc...   Here are the cases:
1341
1342        I.i.u I.j.v                           leave alone, nonoverlapping
1343        I.i.u I.i.v                           combine: Iivu
1344
1345        R.i-j.u R.x-y.v | i-j in x-y          delete first R
1346        R.i-j.u R.i-j.v                       delete first R
1347        R.i-j.u R.x-y.v | x-y in i-j          ERROR
1348        R.i-j.u R.x-y.v | boundaries overlap  ERROR
1349
1350        Delete special case of replace (text==null):
1351        D.i-j.u D.x-y.v |                     boundaries overlapcombine to
1352                                              max(min)..max(right)
1353
1354        I.i.u R.x-y.v   |                     i in (x+1)-ydelete I (since
1355                                              insert before we're not deleting
1356                                              i)
1357        I.i.u R.x-y.v   |                     i not in (x+1)-yleave alone,
1358                                              nonoverlapping
1359
1360        R.x-y.v I.i.u   | i in x-y            ERROR
1361        R.x-y.v I.x.u                         R.x-y.uv (combine, delete I)
1362        R.x-y.v I.i.u   | i not in x-y        leave alone, nonoverlapping
1363
1364        I.i.u = insert u before op @ index i
1365        R.x-y.u = replace x-y indexed tokens with u
1366
1367        First we need to examine replaces.  For any replace op:
1368
1369          1. wipe out any insertions before op within that range.
1370          2. Drop any replace op before that is contained completely within
1371             that range.
1372          3. Throw exception upon boundary overlap with any previous replace.
1373
1374        Then we can deal with inserts:
1375
1376          1. for any inserts to same index, combine even if not adjacent.
1377          2. for any prior replace with same left boundary, combine this
1378             insert with replace and delete this replace.
1379          3. throw exception if index in same range as previous replace
1380
1381        Don't actually delete; make op null in list. Easier to walk list.
1382        Later we can throw as we add to index -> op map.
1383
1384        Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
1385        inserted stuff would be before the replace range.  But, if you
1386        add tokens in front of a method body '{' and then delete the method
1387        body, I think the stuff before the '{' you added should disappear too.
1388
1389        Return a map from token index to operation.
1390        """
1391
1392        # WALK REPLACES
1393        for i, rop in enumerate(rewrites):
1394            if rop is None:
1395                continue
1396
1397            if not isinstance(rop, ReplaceOp):
1398                continue
1399
1400            # Wipe prior inserts within range
1401            for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1402                if iop.index == rop.index:
1403                    # E.g., insert before 2, delete 2..2; update replace
1404                    # text to include insert before, kill insert
1405                    rewrites[iop.instructionIndex] = None
1406                    rop.text = self.catOpText(iop.text, rop.text)
1407
1408                elif iop.index > rop.index and iop.index <= rop.lastIndex:
1409                    # delete insert as it's a no-op.
1410                    rewrites[j] = None
1411
1412            # Drop any prior replaces contained within
1413            for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i):
1414                if (prevRop.index >= rop.index
1415                    and prevRop.lastIndex <= rop.lastIndex):
1416                    # delete replace as it's a no-op.
1417                    rewrites[j] = None
1418                    continue
1419
1420                # throw exception unless disjoint or identical
1421                disjoint = (prevRop.lastIndex < rop.index
1422                            or prevRop.index > rop.lastIndex)
1423                same = (prevRop.index == rop.index
1424                        and prevRop.lastIndex == rop.lastIndex)
1425
1426                # Delete special case of replace (text==null):
1427                # D.i-j.u D.x-y.v| boundaries overlapcombine to
1428                # max(min)..max(right)
1429                if prevRop.text is None and rop.text is None and not disjoint:
1430                    # kill first delete
1431                    rewrites[prevRop.instructionIndex] = None
1432
1433                    rop.index = min(prevRop.index, rop.index)
1434                    rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
1435
1436                elif not disjoint and not same:
1437                    raise ValueError(
1438                        "replace op boundaries of %s overlap with previous %s"
1439                        % (rop, prevRop))
1440
1441        # WALK INSERTS
1442        for i, iop in enumerate(rewrites):
1443            if iop is None:
1444                continue
1445
1446            if not isinstance(iop, InsertBeforeOp):
1447                continue
1448
1449            # combine current insert with prior if any at same index
1450            for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1451                if prevIop.index == iop.index: # combine objects
1452                    # convert to strings...we're in process of toString'ing
1453                    # whole token buffer so no lazy eval issue with any
1454                    # templates
1455                    iop.text = self.catOpText(iop.text, prevIop.text)
1456                    # delete redundant prior insert
1457                    rewrites[j] = None
1458
1459            # look for replaces where iop.index is in range; error
1460            for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i):
1461                if iop.index == rop.index:
1462                    rop.text = self.catOpText(iop.text, rop.text)
1463                    # delete current insert
1464                    rewrites[i] = None
1465                    continue
1466
1467                if iop.index >= rop.index and iop.index <= rop.lastIndex:
1468                    raise ValueError(
1469                        "insert op %s within boundaries of previous %s"
1470                        % (iop, rop))
1471
1472        m = {}
1473        for i, op in enumerate(rewrites):
1474            if op is None:
1475                # ignore deleted ops
1476                continue
1477
1478            assert op.index not in m, "should only be one op per index"
1479            m[op.index] = op
1480
1481        return m
1482
1483
1484    def catOpText(self, a, b):
1485        x = ""
1486        y = ""
1487        if a is not None:
1488            x = a
1489        if b is not None:
1490            y = b
1491        return x + y
1492
1493
1494    def getKindOfOps(self, rewrites, kind, before=None):
1495        """Get all operations before an index of a particular kind."""
1496
1497        if before is None:
1498            before = len(rewrites)
1499        elif before > len(rewrites):
1500            before = len(rewrites)
1501
1502        for i, op in enumerate(rewrites[:before]):
1503            if op is None:
1504                # ignore deleted
1505                continue
1506            if op.__class__ == kind:
1507                yield i, op
1508
1509
1510    def toDebugString(self, start=None, end=None):
1511        if start is None:
1512            start = self.MIN_TOKEN_INDEX
1513        if end is None:
1514            end = self.size() - 1
1515
1516        buf = StringIO()
1517        i = start
1518        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1519            buf.write(self.get(i))
1520            i += 1
1521
1522        return buf.getvalue()
1523