TokenRewriteStream.as revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 [The "BSD licence"]
3 Copyright (c) 2005-2006 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15    derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28package org.antlr.runtime {
29	import flash.utils.getQualifiedClassName;
30
31
32	/** Useful for dumping out the input stream after doing some
33	 *  augmentation or other manipulations.
34	 *
35	 *  You can insert stuff, replace, and delete chunks.  Note that the
36	 *  operations are done lazily--only if you convert the buffer to a
37	 *  String.  This is very efficient because you are not moving data around
38	 *  all the time.  As the buffer of tokens is converted to strings, the
39	 *  toString() method(s) check to see if there is an operation at the
40	 *  current index.  If so, the operation is done and then normal String
41	 *  rendering continues on the buffer.  This is like having multiple Turing
42	 *  machine instruction streams (programs) operating on a single input tape. :)
43	 *
44	 *  Since the operations are done lazily at toString-time, operations do not
45	 *  screw up the token index values.  That is, an insert operation at token
46	 *  index i does not change the index values for tokens i+1..n-1.
47	 *
48	 *  Because operations never actually alter the buffer, you may always get
49	 *  the original token stream back without undoing anything.  Since
50	 *  the instructions are queued up, you can easily simulate transactions and
51	 *  roll back any changes if there is an error just by removing instructions.
52	 *  For example,
53	 *
54	 *   var input:CharStream = new ANTLRFileStream("input");
55	 *   var lex:TLexer = new TLexer(input);
56	 *   var tokens:TokenRewriteStream = new TokenRewriteStream(lex);
57	 *   var parser:T = new T(tokens);
58	 *   parser.startRule();
59	 *
60	 * 	 Then in the rules, you can execute
61	 *      var t:Token t, u:Token;
62	 *      ...
63	 *      input.insertAfter(t, "text to put after t");}
64	 * 		input.insertAfter(u, "text after u");}
65	 * 		trace(tokens.toString());
66	 *
67	 *  Actually, you have to cast the 'input' to a TokenRewriteStream. :(
68	 *
69	 *  You can also have multiple "instruction streams" and get multiple
70	 *  rewrites from a single pass over the input.  Just name the instruction
71	 *  streams and use that name again when printing the buffer.  This could be
72	 *  useful for generating a C file and also its header file--all from the
73	 *  same buffer:
74	 *
75	 *      tokens.insertAfter("pass1", t, "text to put after t");}
76	 * 		tokens.insertAfter("pass2", u, "text after u");}
77	 * 		trace(tokens.toString("pass1"));
78	 * 		trace(tokens.toString("pass2"));
79	 *
80	 *  If you don't use named rewrite streams, a "default" stream is used as
81	 *  the first example shows.
82	 */
83	public class TokenRewriteStream extends CommonTokenStream {
84		public static const DEFAULT_PROGRAM_NAME:String = "default";
85		public static const MIN_TOKEN_INDEX:int = 0;
86
87		/** You may have multiple, named streams of rewrite operations.
88		 *  I'm calling these things "programs."
89		 *  Maps String (name) -> rewrite (List)
90		 */
91		protected var programs:Object = new Object();
92
93		/** Map String (program name) -> Integer index */
94		protected var lastRewriteTokenIndexes:Object = new Object();
95
96		public function TokenRewriteStream(tokenSource:TokenSource = null, channel:int = TokenConstants.DEFAULT_CHANNEL) {
97			super(tokenSource, channel);
98			programs[DEFAULT_PROGRAM_NAME] = new Array();
99		}
100
101	    /** Rollback the instruction stream for a program so that
102		 *  the indicated instruction (via instructionIndex) is no
103		 *  longer in the stream.  UNTESTED!
104		 */
105		public function rollback(instructionIndex:int, programName:String = DEFAULT_PROGRAM_NAME):void {
106			var isn:Array = programs[programName] as Array;
107			if ( isn != null ) {
108				programs[programName] = isn.slice(MIN_TOKEN_INDEX,instructionIndex);
109			}
110		}
111
112		/** Reset the program so that no instructions exist */
113		public function deleteProgram(programName:String = DEFAULT_PROGRAM_NAME):void {
114			rollback(MIN_TOKEN_INDEX, programName);
115		}
116
117		public function insertAfterToken(t:Token, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
118			insertAfter(t.tokenIndex, text, programName);
119		}
120
121		public function insertAfter(index:int, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
122			// to insert after, just insert before next index (even if past end)
123			insertBefore(index+1, text, programName);
124		}
125
126		public function insertBeforeToken(t:Token, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
127			insertBefore(t.tokenIndex, text, programName);
128		}
129
130		public function insertBefore(index:int, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
131			var op:RewriteOperation = new InsertBeforeOp(index,text);
132			var rewrites:Array = getProgram(programName);
133			op.instructionIndex = rewrites.length;
134			rewrites.push(op);
135		}
136
137		public function replace(index:int, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
138			replaceRange(index, index, text, programName);
139		}
140
141		public function replaceRange(fromIndex:int, toIndex:int, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
142			if ( fromIndex > toIndex || fromIndex<0 || toIndex<0 || toIndex >= tokens.length ) {
143				throw new Error("replace: range invalid: "+fromIndex+".."+toIndex+"(size="+tokens.length+")");
144			}
145			var op:RewriteOperation = new ReplaceOp(fromIndex, toIndex, text);
146			var rewrites:Array = getProgram(programName);
147			op.instructionIndex = rewrites.length;
148			rewrites.push(op);
149		}
150
151		public function replaceToken(indexT:Token, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
152			replaceTokenRange(indexT, indexT, text, programName);
153		}
154
155		public function replaceTokenRange(fromToken:Token, toToken:Token, text:Object, programName:String = DEFAULT_PROGRAM_NAME):void {
156			replaceRange(fromToken.tokenIndex, toToken.tokenIndex, text, programName);
157		}
158
159		public function remove(index:int, programName:String = DEFAULT_PROGRAM_NAME):void {
160			removeRange(index, index, programName);
161		}
162
163		public function removeRange(fromIndex:int, toIndex:int, programName:String = DEFAULT_PROGRAM_NAME):void {
164			replaceRange(fromIndex, toIndex, null, programName);
165		}
166
167		public function removeToken(token:Token, programName:String = DEFAULT_PROGRAM_NAME):void {
168			removeTokenRange(token, token, programName);
169		}
170
171		public function removeTokenRange(fromToken:Token, toToken:Token, programName:String = DEFAULT_PROGRAM_NAME):void {
172			replaceTokenRange(fromToken, toToken, null, programName);
173		}
174
175		public function getLastRewriteTokenIndex(programName:String = DEFAULT_PROGRAM_NAME):int {
176			var i:* = lastRewriteTokenIndexes[programName];
177			if ( i == undefined ) {
178				return -1;
179			}
180			return i as int;
181		}
182
183		protected function setLastRewriteTokenIndex(programName:String, i:int):void {
184			lastRewriteTokenIndexes[programName] = i;
185		}
186
187		protected function getProgram(name:String):Array {
188			var isn:Array = programs[name] as Array;
189			if ( isn==null ) {
190				isn = initializeProgram(name);
191			}
192			return isn;
193		}
194
195		private function initializeProgram(name:String):Array {
196			var isn:Array = new Array();
197			programs[name] =  isn;
198			return isn;
199		}
200
201		public function toOriginalString():String {
202			return toOriginalStringWithRange(MIN_TOKEN_INDEX, size-1);
203		}
204
205		public function toOriginalStringWithRange(start:int, end:int):String {
206			var buf:String = new String();
207			for (var i:int=start; i>=MIN_TOKEN_INDEX && i<=end && i<tokens.length; i++) {
208				buf += getToken(i).text;
209			}
210			return buf.toString();
211		}
212
213		public override function toString():String {
214			return toStringWithRange(MIN_TOKEN_INDEX, size-1);
215		}
216
217		public override function toStringWithRange(start:int, end:int):String {
218			return toStringWithRangeAndProgram(start, end, DEFAULT_PROGRAM_NAME);
219		}
220
221		public function toStringWithRangeAndProgram(start:int, end:int, programName:String):String {
222			var rewrites:Array = programs[programName] as Array;
223
224			// ensure start/end are in range
225	        if ( end > tokens.length-1 ) end = tokens.length-1;
226	        if ( start < 0 ) start = 0;
227
228			if ( rewrites==null || rewrites.length==0 ) {
229				return toOriginalStringWithRange(start,end); // no instructions to execute
230			}
231			var state:RewriteState = new RewriteState();
232			state.tokens = tokens;
233
234			// First, optimize instruction stream
235	        var indexToOp:Array = reduceToSingleOperationPerIndex(rewrites);
236
237	        // Walk buffer, executing instructions and emitting tokens
238	        var i:int = start;
239	        while ( i <= end && i < tokens.length ) {
240	            var op:RewriteOperation = RewriteOperation(indexToOp[i]);
241	            indexToOp[i] = undefined; // remove so any left have index size-1
242	            var t:Token = Token(tokens[i]);
243	            if ( op==null ) {
244	                // no operation at that index, just dump token
245	                state.buf += t.text;
246	                i++; // move to next token
247	            }
248	            else {
249	                i = op.execute(state); // execute operation and skip
250	            }
251	        }
252
253	        // include stuff after end if it's last index in buffer
254	        // So, if they did an insertAfter(lastValidIndex, "foo"), include
255	        // foo if end==lastValidIndex.
256	        if ( end==tokens.length-1 ) {
257	            // Scan any remaining operations after last token
258	            // should be included (they will be inserts).
259	            for each (op in indexToOp) {
260	            	if (op == null) continue;
261	                if ( op.index >= tokens.length-1 ) state.buf += op.text;
262	            }
263	        }
264
265	        return state.buf;
266		}
267
268	    /** We need to combine operations and report invalid operations (like
269	     *  overlapping replaces that are not completed nested).  Inserts to
270	     *  same index need to be combined etc...   Here are the cases:
271	     *
272	     *  I.i.u I.j.v                             leave alone, nonoverlapping
273	     *  I.i.u I.i.v                             combine: Iivu
274	     *
275	     *  R.i-j.u R.x-y.v | i-j in x-y            delete first R
276	     *  R.i-j.u R.i-j.v                         delete first R
277	     *  R.i-j.u R.x-y.v | x-y in i-j            ERROR
278	     *  R.i-j.u R.x-y.v | boundaries overlap    ERROR
279	     *
280	     *  I.i.u R.x-y.v | i in x-y                delete I
281	     *  I.i.u R.x-y.v | i not in x-y            leave alone, nonoverlapping
282	     *  R.x-y.v I.i.u | i in x-y                ERROR
283	     *  R.x-y.v I.x.u                           R.x-y.uv (combine, delete I)
284	     *  R.x-y.v I.i.u | i not in x-y            leave alone, nonoverlapping
285	     *
286	     *  I.i.u = insert u before op @ index i
287	     *  R.x-y.u = replace x-y indexed tokens with u
288	     *
289	     *  First we need to examine replaces.  For any replace op:
290	     *
291	     *      1. wipe out any insertions before op within that range.
292	     *      2. Drop any replace op before that is contained completely within
293	     *         that range.
294	     *      3. Throw exception upon boundary overlap with any previous replace.
295	     *
296	     *  Then we can deal with inserts:
297	     *
298	     *      1. for any inserts to same index, combine even if not adjacent.
299	     *      2. for any prior replace with same left boundary, combine this
300	     *         insert with replace and delete this replace.
301	     *      3. throw exception if index in same range as previous replace
302	     *
303	     *  Don't actually delete; make op null in list. Easier to walk list.
304	     *  Later we can throw as we add to index -> op map.
305	     *
306	     *  Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
307	     *  inserted stuff would be before the replace range.  But, if you
308	     *  add tokens in front of a method body '{' and then delete the method
309	     *  body, I think the stuff before the '{' you added should disappear too.
310	     *
311	     *  Return a map from token index to operation.
312	     */
313	    protected function reduceToSingleOperationPerIndex(rewrites:Array):Array {
314	        //System.out.println("rewrites="+rewrites);
315
316	        // WALK REPLACES
317	        for (var i:int = 0; i < rewrites.length; i++) {
318	            var op:RewriteOperation = RewriteOperation(rewrites[i]);
319	            if ( op==null ) continue;
320	            if ( !(op is ReplaceOp) ) continue;
321	            var rop:ReplaceOp = ReplaceOp(rewrites[i]);
322	            // Wipe prior inserts within range
323	            var inserts:Array = getKindOfOps(rewrites, InsertBeforeOp, i);
324	            for (var j:int = 0; j < inserts.length; j++) {
325	                var iop:InsertBeforeOp = InsertBeforeOp(inserts[j]);
326	                if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
327	                    rewrites[iop.instructionIndex] = null;  // delete insert as it's a no-op.
328	                }
329	            }
330	            // Drop any prior replaces contained within
331	            var prevReplaces:Array = getKindOfOps(rewrites, ReplaceOp, i);
332	            for (j = 0; j < prevReplaces.length; j++) {
333	                var prevRop:ReplaceOp = ReplaceOp(prevReplaces[j]);
334	                if ( prevRop.index>=rop.index && prevRop.lastIndex <= rop.lastIndex ) {
335	                    rewrites[prevRop.instructionIndex] = null;  // delete replace as it's a no-op.
336	                    continue;
337	                }
338	                // throw exception unless disjoint or identical
339	                var disjoint:Boolean =
340	                    prevRop.lastIndex<rop.index || prevRop.index > rop.lastIndex;
341	                var same:Boolean =
342	                    prevRop.index==rop.index && prevRop.lastIndex==rop.lastIndex;
343	                if ( !disjoint && !same ) {
344	                    throw new Error("replace op boundaries of "+rop+
345	                                                       " overlap with previous "+prevRop);
346	                }
347	            }
348	        }
349
350	        // WALK INSERTS
351	        for (i = 0; i < rewrites.length; i++) {
352	            op = RewriteOperation(rewrites[i]);
353	            if ( op==null ) continue;
354	            if ( !(op is InsertBeforeOp) ) continue;
355	            iop = InsertBeforeOp(rewrites[i]);
356	            // combine current insert with prior if any at same index
357	            var prevInserts:Array = getKindOfOps(rewrites, InsertBeforeOp, i);
358	            for (j = 0; j < prevInserts.length; j++) {
359	                var prevIop:InsertBeforeOp = InsertBeforeOp(prevInserts[j]);
360	                if ( prevIop.index == iop.index ) { // combine objects
361	                    // convert to strings...we're in process of toString'ing
362	                    // whole token buffer so no lazy eval issue with any templates
363	                    iop.text = catOpText(iop.text,prevIop.text);
364	                    rewrites[prevIop.instructionIndex] = null;  // delete redundant prior insert
365	                }
366	            }
367	            // look for replaces where iop.index is in range; error
368	            prevReplaces = getKindOfOps(rewrites, ReplaceOp, i);
369	            for (j = 0; j < prevReplaces.length; j++) {
370	                rop = ReplaceOp(prevReplaces[j]);
371	                if ( iop.index == rop.index ) {
372	                    rop.text = catOpText(iop.text,rop.text);
373	                    rewrites[i] = null;  // delete current insert
374	                    continue;
375	                }
376	                if ( iop.index >= rop.index && iop.index <= rop.lastIndex ) {
377	                    throw new Error("insert op "+iop+
378	                                                       " within boundaries of previous "+rop);
379	                }
380	            }
381	        }
382	        // System.out.println("rewrites after="+rewrites);
383	        var m:Array = new Array();
384	        for (i = 0; i < rewrites.length; i++) {
385	            op = RewriteOperation(rewrites[i]);
386	            if ( op==null ) continue; // ignore deleted ops
387	            if ( m[op.index] != undefined ) {
388	                throw new Error("should only be one op per index");
389	            }
390	            m[op.index] = op;
391	        }
392	        //System.out.println("index to op: "+m);
393	        return m;
394	    }
395
396	    protected function catOpText(a:Object, b:Object):String {
397	        var x:String = "";
398	        var y:String = "";
399	        if ( a!=null ) x = a.toString();
400	        if ( b!=null ) y = b.toString();
401	        return x+y;
402	    }
403
404	    /** Get all operations before an index of a particular kind */
405	    protected function getKindOfOps(rewrites:Array, kind:Class, before:int = -1):Array {
406	    	if (before == -1) {
407	    		before = rewrites.length;
408	    	}
409	    	var ops:Array = new Array();
410	        for (var i:int=0; i<before && i<rewrites.length; i++) {
411	            var op:RewriteOperation = RewriteOperation(rewrites[i]);
412	            if ( op==null ) continue; // ignore deleted
413	            if ( getQualifiedClassName(op) == getQualifiedClassName(kind) ) ops.push(op);
414	        }
415	        return ops;
416	    }
417
418
419		public function toDebugString():String {
420			return toDebugStringWithRange(MIN_TOKEN_INDEX, size-1);
421		}
422
423		public function toDebugStringWithRange(start:int, end:int):String {
424			var buf:String = new String();
425			for (var i:int=start; i>=MIN_TOKEN_INDEX && i<=end && i<tokens.length; i++) {
426				buf += getToken(i);
427			}
428			return buf;
429		}
430
431
432	}
433}
434	import org.antlr.runtime.Token;
435
436
437// Define the rewrite operation hierarchy
438
439class RewriteState {
440	public var buf:String = new String();
441	public var tokens:Array;
442}
443
444class RewriteOperation {
445	/** What index into rewrites List are we? */
446    internal var instructionIndex:int;
447    /** Token buffer index. */
448	public var index:int;
449	internal var text:Object;
450	public function RewriteOperation(index:int, text:Object) {
451		this.index = index;
452		this.text = text;
453	}
454	/** Execute the rewrite operation by possibly adding to the buffer.
455	 *  Return the index of the next token to operate on.
456	 */
457	public function execute(state:RewriteState):int {
458		return index;
459	}
460}
461
462class InsertBeforeOp extends RewriteOperation {
463	public function InsertBeforeOp(index:int, text:Object) {
464		super(index,text);
465	}
466
467	public override function execute(state:RewriteState):int {
468		state.buf += text;
469		state.buf += Token(state.tokens[index]).text;
470		return index + 1;
471	}
472
473	public function toString():String {
474		return "<InsertBeforeOp@" + index + ":\"" + text + "\">";
475	}
476}
477
478/** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
479 *  instructions.
480 */
481class ReplaceOp extends RewriteOperation {
482	public var lastIndex:int;
483
484	public function ReplaceOp(fromIndex:int, toIndex:int, text:Object) {
485		super(fromIndex, text);
486		lastIndex = toIndex;
487	}
488
489	public override function execute(state:RewriteState):int {
490		if ( text!=null ) {
491			state.buf += text;
492		}
493		return lastIndex+1;
494	}
495
496	public function toString():String {
497		return "<ReplaceOp@" + index + ".." + lastIndex + ":\"" + text + "\">";
498	}
499}
500
501class DeleteOp extends ReplaceOp {
502	public function DeleteOp(fromIndex:int, toIndex:int) {
503		super(fromIndex, toIndex, null);
504	}
505
506	public override function toString():String {
507		return "<DeleteOp@" + index + ".." + lastIndex + ">";
508	}
509}
510