1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2009 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10     notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12     notice, this list of conditions and the following disclaimer in the
13     documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15     derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.runtime.tree;
29
30import org.antlr.runtime.Token;
31
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.List;
35import java.util.Map;
36
37/** Build and navigate trees with this object.  Must know about the names
38 *  of tokens so you have to pass in a map or array of token names (from which
39 *  this class can build the map).  I.e., Token DECL means nothing unless the
40 *  class can translate it to a token type.
41 *
42 *  In order to create nodes and navigate, this class needs a TreeAdaptor.
43 *
44 *  This class can build a token type -> node index for repeated use or for
45 *  iterating over the various nodes with a particular type.
46 *
47 *  This class works in conjunction with the TreeAdaptor rather than moving
48 *  all this functionality into the adaptor.  An adaptor helps build and
49 *  navigate trees using methods.  This class helps you do it with string
50 *  patterns like "(A B C)".  You can create a tree from that pattern or
51 *  match subtrees against it.
52 */
53public class TreeWizard {
54	protected TreeAdaptor adaptor;
55	protected Map tokenNameToTypeMap;
56
57	public interface ContextVisitor {
58		// TODO: should this be called visit or something else?
59		public void visit(Object t, Object parent, int childIndex, Map labels);
60	}
61
62	public static abstract class Visitor implements ContextVisitor {
63		public void visit(Object t, Object parent, int childIndex, Map labels) {
64			visit(t);
65		}
66		public abstract void visit(Object t);
67	}
68
69	/** When using %label:TOKENNAME in a tree for parse(), we must
70	 *  track the label.
71	 */
72	public static class TreePattern extends CommonTree {
73		public String label;
74		public boolean hasTextArg;
75		public TreePattern(Token payload) {
76			super(payload);
77		}
78		public String toString() {
79			if ( label!=null ) {
80				return "%"+label+":"+super.toString();
81			}
82			else {
83				return super.toString();
84			}
85		}
86	}
87
88	public static class WildcardTreePattern extends TreePattern {
89		public WildcardTreePattern(Token payload) {
90			super(payload);
91		}
92	}
93
94	/** This adaptor creates TreePattern objects for use during scan() */
95	public static class TreePatternTreeAdaptor extends CommonTreeAdaptor {
96		public Object create(Token payload) {
97			return new TreePattern(payload);
98		}
99	}
100
101	// TODO: build indexes for the wizard
102
103	/** During fillBuffer(), we can make a reverse index from a set
104	 *  of token types of interest to the list of indexes into the
105	 *  node stream.  This lets us convert a node pointer to a
106	 *  stream index semi-efficiently for a list of interesting
107	 *  nodes such as function definition nodes (you'll want to seek
108	 *  to their bodies for an interpreter).  Also useful for doing
109	 *  dynamic searches; i.e., go find me all PLUS nodes.
110	protected Map tokenTypeToStreamIndexesMap;
111
112	/** If tokenTypesToReverseIndex set to INDEX_ALL then indexing
113	 *  occurs for all token types.
114	public static final Set INDEX_ALL = new HashSet();
115
116	/** A set of token types user would like to index for faster lookup.
117	 *  If this is INDEX_ALL, then all token types are tracked.  If null,
118	 *  then none are indexed.
119	protected Set tokenTypesToReverseIndex = null;
120	*/
121
122	public TreeWizard(TreeAdaptor adaptor) {
123		this.adaptor = adaptor;
124	}
125
126	public TreeWizard(TreeAdaptor adaptor, Map tokenNameToTypeMap) {
127		this.adaptor = adaptor;
128		this.tokenNameToTypeMap = tokenNameToTypeMap;
129	}
130
131	public TreeWizard(TreeAdaptor adaptor, String[] tokenNames) {
132		this.adaptor = adaptor;
133		this.tokenNameToTypeMap = computeTokenTypes(tokenNames);
134	}
135
136	public TreeWizard(String[] tokenNames) {
137		this(new CommonTreeAdaptor(), tokenNames);
138	}
139
140	/** Compute a Map<String, Integer> that is an inverted index of
141	 *  tokenNames (which maps int token types to names).
142	 */
143	public Map computeTokenTypes(String[] tokenNames) {
144		Map m = new HashMap();
145		if ( tokenNames==null ) {
146			return m;
147		}
148		for (int ttype = Token.MIN_TOKEN_TYPE; ttype < tokenNames.length; ttype++) {
149			String name = tokenNames[ttype];
150			m.put(name, new Integer(ttype));
151		}
152		return m;
153	}
154
155	/** Using the map of token names to token types, return the type. */
156	public int getTokenType(String tokenName) {
157	 	if ( tokenNameToTypeMap==null ) {
158			 return Token.INVALID_TOKEN_TYPE;
159		 }
160		Integer ttypeI = (Integer)tokenNameToTypeMap.get(tokenName);
161		if ( ttypeI!=null ) {
162			return ttypeI.intValue();
163		}
164		return Token.INVALID_TOKEN_TYPE;
165	}
166
167	/** Walk the entire tree and make a node name to nodes mapping.
168	 *  For now, use recursion but later nonrecursive version may be
169	 *  more efficient.  Returns Map<Integer, List> where the List is
170	 *  of your AST node type.  The Integer is the token type of the node.
171	 *
172	 *  TODO: save this index so that find and visit are faster
173	 */
174	public Map index(Object t) {
175		Map m = new HashMap();
176		_index(t, m);
177		return m;
178	}
179
180	/** Do the work for index */
181	protected void _index(Object t, Map m) {
182		if ( t==null ) {
183			return;
184		}
185		int ttype = adaptor.getType(t);
186		List elements = (List)m.get(new Integer(ttype));
187		if ( elements==null ) {
188			elements = new ArrayList();
189			m.put(new Integer(ttype), elements);
190		}
191		elements.add(t);
192		int n = adaptor.getChildCount(t);
193		for (int i=0; i<n; i++) {
194			Object child = adaptor.getChild(t, i);
195			_index(child, m);
196		}
197	}
198
199	/** Return a List of tree nodes with token type ttype */
200	public List find(Object t, int ttype) {
201		final List nodes = new ArrayList();
202		visit(t, ttype, new TreeWizard.Visitor() {
203			public void visit(Object t) {
204				nodes.add(t);
205			}
206		});
207		return nodes;
208	}
209
210	/** Return a List of subtrees matching pattern. */
211	public List find(Object t, String pattern) {
212		final List subtrees = new ArrayList();
213		// Create a TreePattern from the pattern
214		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
215		TreePatternParser parser =
216			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
217		final TreePattern tpattern = (TreePattern)parser.pattern();
218		// don't allow invalid patterns
219		if ( tpattern==null ||
220			 tpattern.isNil() ||
221			 tpattern.getClass()==WildcardTreePattern.class )
222		{
223			return null;
224		}
225		int rootTokenType = tpattern.getType();
226		visit(t, rootTokenType, new TreeWizard.ContextVisitor() {
227			public void visit(Object t, Object parent, int childIndex, Map labels) {
228				if ( _parse(t, tpattern, null) ) {
229					subtrees.add(t);
230				}
231			}
232		});
233		return subtrees;
234	}
235
236	public Object findFirst(Object t, int ttype) {
237		return null;
238	}
239
240	public Object findFirst(Object t, String pattern) {
241		return null;
242	}
243
244	/** Visit every ttype node in t, invoking the visitor.  This is a quicker
245	 *  version of the general visit(t, pattern) method.  The labels arg
246	 *  of the visitor action method is never set (it's null) since using
247	 *  a token type rather than a pattern doesn't let us set a label.
248	 */
249	public void visit(Object t, int ttype, ContextVisitor visitor) {
250		_visit(t, null, 0, ttype, visitor);
251	}
252
253	/** Do the recursive work for visit */
254	protected void _visit(Object t, Object parent, int childIndex, int ttype, ContextVisitor visitor) {
255		if ( t==null ) {
256			return;
257		}
258		if ( adaptor.getType(t)==ttype ) {
259			visitor.visit(t, parent, childIndex, null);
260		}
261		int n = adaptor.getChildCount(t);
262		for (int i=0; i<n; i++) {
263			Object child = adaptor.getChild(t, i);
264			_visit(child, t, i, ttype, visitor);
265		}
266	}
267
268	/** For all subtrees that match the pattern, execute the visit action.
269	 *  The implementation uses the root node of the pattern in combination
270	 *  with visit(t, ttype, visitor) so nil-rooted patterns are not allowed.
271	 *  Patterns with wildcard roots are also not allowed.
272	 */
273	public void visit(Object t, final String pattern, final ContextVisitor visitor) {
274		// Create a TreePattern from the pattern
275		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
276		TreePatternParser parser =
277			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
278		final TreePattern tpattern = (TreePattern)parser.pattern();
279		// don't allow invalid patterns
280		if ( tpattern==null ||
281			 tpattern.isNil() ||
282			 tpattern.getClass()==WildcardTreePattern.class )
283		{
284			return;
285		}
286		final Map labels = new HashMap(); // reused for each _parse
287		int rootTokenType = tpattern.getType();
288		visit(t, rootTokenType, new TreeWizard.ContextVisitor() {
289			public void visit(Object t, Object parent, int childIndex, Map unusedlabels) {
290				// the unusedlabels arg is null as visit on token type doesn't set.
291				labels.clear();
292				if ( _parse(t, tpattern, labels) ) {
293					visitor.visit(t, parent, childIndex, labels);
294				}
295			}
296		});
297	}
298
299	/** Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels
300	 *  on the various nodes and '.' (dot) as the node/subtree wildcard,
301	 *  return true if the pattern matches and fill the labels Map with
302	 *  the labels pointing at the appropriate nodes.  Return false if
303	 *  the pattern is malformed or the tree does not match.
304	 *
305	 *  If a node specifies a text arg in pattern, then that must match
306	 *  for that node in t.
307	 *
308	 *  TODO: what's a better way to indicate bad pattern? Exceptions are a hassle
309	 */
310	public boolean parse(Object t, String pattern, Map labels) {
311		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
312		TreePatternParser parser =
313			new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
314		TreePattern tpattern = (TreePattern)parser.pattern();
315		/*
316		System.out.println("t="+((Tree)t).toStringTree());
317		System.out.println("scant="+tpattern.toStringTree());
318		*/
319		boolean matched = _parse(t, tpattern, labels);
320		return matched;
321	}
322
323	public boolean parse(Object t, String pattern) {
324		return parse(t, pattern, null);
325	}
326
327	/** Do the work for parse. Check to see if the t2 pattern fits the
328	 *  structure and token types in t1.  Check text if the pattern has
329	 *  text arguments on nodes.  Fill labels map with pointers to nodes
330	 *  in tree matched against nodes in pattern with labels.
331	 */
332	protected boolean _parse(Object t1, TreePattern tpattern, Map labels) {
333		// make sure both are non-null
334		if ( t1==null || tpattern==null ) {
335			return false;
336		}
337		// check roots (wildcard matches anything)
338		if ( tpattern.getClass() != WildcardTreePattern.class ) {
339			if ( adaptor.getType(t1) != tpattern.getType() ) return false;
340            // if pattern has text, check node text
341			if ( tpattern.hasTextArg && !adaptor.getText(t1).equals(tpattern.getText()) ) {
342				return false;
343			}
344		}
345		if ( tpattern.label!=null && labels!=null ) {
346			// map label in pattern to node in t1
347			labels.put(tpattern.label, t1);
348		}
349		// check children
350		int n1 = adaptor.getChildCount(t1);
351		int n2 = tpattern.getChildCount();
352		if ( n1 != n2 ) {
353			return false;
354		}
355		for (int i=0; i<n1; i++) {
356			Object child1 = adaptor.getChild(t1, i);
357			TreePattern child2 = (TreePattern)tpattern.getChild(i);
358			if ( !_parse(child1, child2, labels) ) {
359				return false;
360			}
361		}
362		return true;
363	}
364
365	/** Create a tree or node from the indicated tree pattern that closely
366	 *  follows ANTLR tree grammar tree element syntax:
367	 *
368	 * 		(root child1 ... child2).
369	 *
370	 *  You can also just pass in a node: ID
371	 *
372	 *  Any node can have a text argument: ID[foo]
373	 *  (notice there are no quotes around foo--it's clear it's a string).
374	 *
375	 *  nil is a special name meaning "give me a nil node".  Useful for
376	 *  making lists: (nil A B C) is a list of A B C.
377 	 */
378	public Object create(String pattern) {
379		TreePatternLexer tokenizer = new TreePatternLexer(pattern);
380		TreePatternParser parser = new TreePatternParser(tokenizer, this, adaptor);
381		Object t = parser.pattern();
382		return t;
383	}
384
385	/** Compare t1 and t2; return true if token types/text, structure match exactly.
386	 *  The trees are examined in their entirety so that (A B) does not match
387	 *  (A B C) nor (A (B C)).
388	 // TODO: allow them to pass in a comparator
389	 *  TODO: have a version that is nonstatic so it can use instance adaptor
390	 *
391	 *  I cannot rely on the tree node's equals() implementation as I make
392	 *  no constraints at all on the node types nor interface etc...
393	 */
394	public static boolean equals(Object t1, Object t2, TreeAdaptor adaptor) {
395		return _equals(t1, t2, adaptor);
396	}
397
398	/** Compare type, structure, and text of two trees, assuming adaptor in
399	 *  this instance of a TreeWizard.
400	 */
401	public boolean equals(Object t1, Object t2) {
402		return _equals(t1, t2, adaptor);
403	}
404
405	protected static boolean _equals(Object t1, Object t2, TreeAdaptor adaptor) {
406		// make sure both are non-null
407		if ( t1==null || t2==null ) {
408			return false;
409		}
410		// check roots
411		if ( adaptor.getType(t1) != adaptor.getType(t2) ) {
412			return false;
413		}
414		if ( !adaptor.getText(t1).equals(adaptor.getText(t2)) ) {
415			return false;
416		}
417		// check children
418		int n1 = adaptor.getChildCount(t1);
419		int n2 = adaptor.getChildCount(t2);
420		if ( n1 != n2 ) {
421			return false;
422		}
423		for (int i=0; i<n1; i++) {
424			Object child1 = adaptor.getChild(t1, i);
425			Object child2 = adaptor.getChild(t2, i);
426			if ( !_equals(child1, child2, adaptor) ) {
427				return false;
428			}
429		}
430		return true;
431	}
432
433	// TODO: next stuff taken from CommonTreeNodeStream
434
435		/** Given a node, add this to the reverse index tokenTypeToStreamIndexesMap.
436	 *  You can override this method to alter how indexing occurs.  The
437	 *  default is to create a
438	 *
439	 *    Map<Integer token type,ArrayList<Integer stream index>>
440	 *
441	 *  This data structure allows you to find all nodes with type INT in order.
442	 *
443	 *  If you really need to find a node of type, say, FUNC quickly then perhaps
444	 *
445	 *    Map<Integertoken type,Map<Object tree node,Integer stream index>>
446	 *
447	 *  would be better for you.  The interior maps map a tree node to
448	 *  the index so you don't have to search linearly for a specific node.
449	 *
450	 *  If you change this method, you will likely need to change
451	 *  getNodeIndex(), which extracts information.
452	protected void fillReverseIndex(Object node, int streamIndex) {
453		//System.out.println("revIndex "+node+"@"+streamIndex);
454		if ( tokenTypesToReverseIndex==null ) {
455			return; // no indexing if this is empty (nothing of interest)
456		}
457		if ( tokenTypeToStreamIndexesMap==null ) {
458			tokenTypeToStreamIndexesMap = new HashMap(); // first indexing op
459		}
460		int tokenType = adaptor.getType(node);
461		Integer tokenTypeI = new Integer(tokenType);
462		if ( !(tokenTypesToReverseIndex==INDEX_ALL ||
463			   tokenTypesToReverseIndex.contains(tokenTypeI)) )
464		{
465			return; // tokenType not of interest
466		}
467		Integer streamIndexI = new Integer(streamIndex);
468		ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI);
469		if ( indexes==null ) {
470			indexes = new ArrayList(); // no list yet for this token type
471			indexes.add(streamIndexI); // not there yet, add
472			tokenTypeToStreamIndexesMap.put(tokenTypeI, indexes);
473		}
474		else {
475			if ( !indexes.contains(streamIndexI) ) {
476				indexes.add(streamIndexI); // not there yet, add
477			}
478		}
479	}
480
481	/** Track the indicated token type in the reverse index.  Call this
482	 *  repeatedly for each type or use variant with Set argument to
483	 *  set all at once.
484	 * @param tokenType
485	public void reverseIndex(int tokenType) {
486		if ( tokenTypesToReverseIndex==null ) {
487			tokenTypesToReverseIndex = new HashSet();
488		}
489		else if ( tokenTypesToReverseIndex==INDEX_ALL ) {
490			return;
491		}
492		tokenTypesToReverseIndex.add(new Integer(tokenType));
493	}
494
495	/** Track the indicated token types in the reverse index. Set
496	 *  to INDEX_ALL to track all token types.
497	public void reverseIndex(Set tokenTypes) {
498		tokenTypesToReverseIndex = tokenTypes;
499	}
500
501	/** Given a node pointer, return its index into the node stream.
502	 *  This is not its Token stream index.  If there is no reverse map
503	 *  from node to stream index or the map does not contain entries
504	 *  for node's token type, a linear search of entire stream is used.
505	 *
506	 *  Return -1 if exact node pointer not in stream.
507	public int getNodeIndex(Object node) {
508		//System.out.println("get "+node);
509		if ( tokenTypeToStreamIndexesMap==null ) {
510			return getNodeIndexLinearly(node);
511		}
512		int tokenType = adaptor.getType(node);
513		Integer tokenTypeI = new Integer(tokenType);
514		ArrayList indexes = (ArrayList)tokenTypeToStreamIndexesMap.get(tokenTypeI);
515		if ( indexes==null ) {
516			//System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node));
517			return getNodeIndexLinearly(node);
518		}
519		for (int i = 0; i < indexes.size(); i++) {
520			Integer streamIndexI = (Integer)indexes.get(i);
521			Object n = get(streamIndexI.intValue());
522			if ( n==node ) {
523				//System.out.println("found in index; stream index = "+streamIndexI);
524				return streamIndexI.intValue(); // found it!
525			}
526		}
527		return -1;
528	}
529
530	*/
531}
532