TreeWizard.cs revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD licence"]
3 * Copyright (c) 2005-2008 Terence Parr
4 * All rights reserved.
5 *
6 * Conversion to C#:
7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33// TODO: build indexes for wizard
34//#define BUILD_INDEXES
35
36namespace Antlr.Runtime.Tree {
37    using System.Collections.Generic;
38
39    using IList = System.Collections.IList;
40#if BUILD_INDEXES
41    using IDictionary = System.Collections.IDictionary;
42#endif
43
44    /** <summary>
45     *  Build and navigate trees with this object.  Must know about the names
46     *  of tokens so you have to pass in a map or array of token names (from which
47     *  this class can build the map).  I.e., Token DECL means nothing unless the
48     *  class can translate it to a token type.
49     *  </summary>
50     *
51     *  <remarks>
52     *  In order to create nodes and navigate, this class needs a TreeAdaptor.
53     *
54     *  This class can build a token type -> node index for repeated use or for
55     *  iterating over the various nodes with a particular type.
56     *
57     *  This class works in conjunction with the TreeAdaptor rather than moving
58     *  all this functionality into the adaptor.  An adaptor helps build and
59     *  navigate trees using methods.  This class helps you do it with string
60     *  patterns like "(A B C)".  You can create a tree from that pattern or
61     *  match subtrees against it.
62     *  </remarks>
63     */
64    public class TreeWizard {
65        protected ITreeAdaptor adaptor;
66        protected IDictionary<string, int> tokenNameToTypeMap;
67
68        public interface IContextVisitor {
69            // TODO: should this be called visit or something else?
70            void Visit(object t, object parent, int childIndex, IDictionary<string, object> labels);
71        }
72
73        public abstract class Visitor : IContextVisitor {
74            public virtual void Visit(object t, object parent, int childIndex, IDictionary<string, object> labels) {
75                Visit(t);
76            }
77            public abstract void Visit(object t);
78        }
79
80        class ActionVisitor : Visitor {
81            System.Action<object> _action;
82
83            public ActionVisitor(System.Action<object> action) {
84                _action = action;
85            }
86
87            public override void Visit(object t) {
88                _action(t);
89            }
90        }
91
92        /** <summary>
93         *  When using %label:TOKENNAME in a tree for parse(), we must
94         *  track the label.
95         *  </summary>
96         */
97        public class TreePattern : CommonTree {
98            public string label;
99            public bool hasTextArg;
100            public TreePattern(IToken payload) :
101                base(payload) {
102            }
103            public override string ToString() {
104                if (label != null) {
105                    return "%" + label + ":"; //+ base.ToString();
106                } else {
107                    return base.ToString();
108                }
109            }
110        }
111
112        public class WildcardTreePattern : TreePattern {
113            public WildcardTreePattern(IToken payload) :
114                base(payload) {
115            }
116        }
117
118        /** <summary>This adaptor creates TreePattern objects for use during scan()</summary> */
119        public class TreePatternTreeAdaptor : CommonTreeAdaptor {
120            public override object Create(IToken payload) {
121                return new TreePattern(payload);
122            }
123        }
124
125#if BUILD_INDEXES
126        // TODO: build indexes for the wizard
127
128        /** <summary>
129         *  During fillBuffer(), we can make a reverse index from a set
130         *  of token types of interest to the list of indexes into the
131         *  node stream.  This lets us convert a node pointer to a
132         *  stream index semi-efficiently for a list of interesting
133         *  nodes such as function definition nodes (you'll want to seek
134         *  to their bodies for an interpreter).  Also useful for doing
135         *  dynamic searches; i.e., go find me all PLUS nodes.
136         *  </summary>
137         */
138        protected IDictionary<int, IList<int>> tokenTypeToStreamIndexesMap;
139
140        /** <summary>
141         *  If tokenTypesToReverseIndex set to INDEX_ALL then indexing
142         *  occurs for all token types.
143         *  </summary>
144         */
145        public static readonly HashSet<int> INDEX_ALL = new HashSet<int>();
146
147        /** <summary>
148         *  A set of token types user would like to index for faster lookup.
149         *  If this is INDEX_ALL, then all token types are tracked.  If null,
150         *  then none are indexed.
151         *  </summary>
152         */
153        protected HashSet<int> tokenTypesToReverseIndex = null;
154#endif
155
156        public TreeWizard(ITreeAdaptor adaptor) {
157            this.adaptor = adaptor;
158        }
159
160        public TreeWizard(ITreeAdaptor adaptor, IDictionary<string, int> tokenNameToTypeMap) {
161            this.adaptor = adaptor;
162            this.tokenNameToTypeMap = tokenNameToTypeMap;
163        }
164
165        public TreeWizard(ITreeAdaptor adaptor, string[] tokenNames) {
166            this.adaptor = adaptor;
167            this.tokenNameToTypeMap = ComputeTokenTypes(tokenNames);
168        }
169
170        public TreeWizard(string[] tokenNames) :
171            this(null, tokenNames) {
172        }
173
174        /** <summary>
175         *  Compute a Map&lt;String, Integer&gt; that is an inverted index of
176         *  tokenNames (which maps int token types to names).
177         *  </summary>
178         */
179        public virtual IDictionary<string, int> ComputeTokenTypes(string[] tokenNames) {
180            IDictionary<string, int> m = new Dictionary<string, int>();
181            if (tokenNames == null) {
182                return m;
183            }
184            for (int ttype = TokenTypes.Min; ttype < tokenNames.Length; ttype++) {
185                string name = tokenNames[ttype];
186                m[name] = ttype;
187            }
188            return m;
189        }
190
191        /** <summary>Using the map of token names to token types, return the type.</summary> */
192        public virtual int GetTokenType(string tokenName) {
193            if (tokenNameToTypeMap == null) {
194                return TokenTypes.Invalid;
195            }
196
197            int value;
198            if (tokenNameToTypeMap.TryGetValue(tokenName, out value))
199                return value;
200
201            return TokenTypes.Invalid;
202        }
203
204        /** <summary>
205         *  Walk the entire tree and make a node name to nodes mapping.
206         *  For now, use recursion but later nonrecursive version may be
207         *  more efficient.  Returns Map&lt;Integer, List&gt; where the List is
208         *  of your AST node type.  The Integer is the token type of the node.
209         *  </summary>
210         *
211         *  <remarks>
212         *  TODO: save this index so that find and visit are faster
213         *  </remarks>
214         */
215        public IDictionary<int, IList> Index(object t) {
216            IDictionary<int, IList> m = new Dictionary<int, IList>();
217            IndexCore(t, m);
218            return m;
219        }
220
221        /** <summary>Do the work for index</summary> */
222        protected virtual void IndexCore(object t, IDictionary<int, IList> m) {
223            if (t == null) {
224                return;
225            }
226            int ttype = adaptor.GetType(t);
227            IList elements;
228            if (!m.TryGetValue(ttype, out elements) || elements == null) {
229                elements = new List<object>();
230                m[ttype] = elements;
231            }
232            elements.Add(t);
233            int n = adaptor.GetChildCount(t);
234            for (int i = 0; i < n; i++) {
235                object child = adaptor.GetChild(t, i);
236                IndexCore(child, m);
237            }
238        }
239
240        class FindTreeWizardVisitor : TreeWizard.Visitor {
241            IList _nodes;
242            public FindTreeWizardVisitor(IList nodes) {
243                _nodes = nodes;
244            }
245            public override void Visit(object t) {
246                _nodes.Add(t);
247            }
248        }
249        class FindTreeWizardContextVisitor : TreeWizard.IContextVisitor {
250            TreeWizard _outer;
251            TreePattern _tpattern;
252            IList _subtrees;
253            public FindTreeWizardContextVisitor(TreeWizard outer, TreePattern tpattern, IList subtrees) {
254                _outer = outer;
255                _tpattern = tpattern;
256                _subtrees = subtrees;
257            }
258
259            public void Visit(object t, object parent, int childIndex, IDictionary<string, object> labels) {
260                if (_outer.ParseCore(t, _tpattern, null)) {
261                    _subtrees.Add(t);
262                }
263            }
264        }
265
266        /** <summary>Return a List of tree nodes with token type ttype</summary> */
267        public virtual IList Find(object t, int ttype) {
268            IList nodes = new List<object>();
269            Visit(t, ttype, new FindTreeWizardVisitor(nodes));
270            return nodes;
271        }
272
273        /** <summary>Return a List of subtrees matching pattern.</summary> */
274        public virtual IList Find(object t, string pattern) {
275            IList subtrees = new List<object>();
276            // Create a TreePattern from the pattern
277            TreePatternLexer tokenizer = new TreePatternLexer(pattern);
278            TreePatternParser parser =
279                new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
280            TreePattern tpattern = (TreePattern)parser.Pattern();
281            // don't allow invalid patterns
282            if (tpattern == null ||
283                 tpattern.IsNil ||
284                 tpattern.GetType() == typeof(WildcardTreePattern)) {
285                return null;
286            }
287            int rootTokenType = tpattern.Type;
288            Visit(t, rootTokenType, new FindTreeWizardContextVisitor(this, tpattern, subtrees));
289            return subtrees;
290        }
291
292        public virtual object FindFirst(object t, int ttype) {
293            return null;
294        }
295
296        public virtual object FindFirst(object t, string pattern) {
297            return null;
298        }
299
300        /** <summary>
301         *  Visit every ttype node in t, invoking the visitor.  This is a quicker
302         *  version of the general visit(t, pattern) method.  The labels arg
303         *  of the visitor action method is never set (it's null) since using
304         *  a token type rather than a pattern doesn't let us set a label.
305         *  </summary>
306         */
307        public void Visit(object t, int ttype, IContextVisitor visitor) {
308            VisitCore(t, null, 0, ttype, visitor);
309        }
310
311        public void Visit(object t, int ttype, System.Action<object> action) {
312            Visit(t, ttype, new ActionVisitor(action));
313        }
314
315        /** <summary>Do the recursive work for visit</summary> */
316        protected virtual void VisitCore(object t, object parent, int childIndex, int ttype, IContextVisitor visitor) {
317            if (t == null) {
318                return;
319            }
320            if (adaptor.GetType(t) == ttype) {
321                visitor.Visit(t, parent, childIndex, null);
322            }
323            int n = adaptor.GetChildCount(t);
324            for (int i = 0; i < n; i++) {
325                object child = adaptor.GetChild(t, i);
326                VisitCore(child, t, i, ttype, visitor);
327            }
328        }
329
330        class VisitTreeWizardContextVisitor : TreeWizard.IContextVisitor {
331            TreeWizard _outer;
332            IContextVisitor _visitor;
333            IDictionary<string, object> _labels;
334            TreePattern _tpattern;
335
336            public VisitTreeWizardContextVisitor(TreeWizard outer, IContextVisitor visitor, IDictionary<string, object> labels, TreePattern tpattern) {
337                _outer = outer;
338                _visitor = visitor;
339                _labels = labels;
340                _tpattern = tpattern;
341            }
342
343            public void Visit(object t, object parent, int childIndex, IDictionary<string, object> unusedlabels) {
344                // the unusedlabels arg is null as visit on token type doesn't set.
345                _labels.Clear();
346                if (_outer.ParseCore(t, _tpattern, _labels)) {
347                    _visitor.Visit(t, parent, childIndex, _labels);
348                }
349            }
350        }
351
352        /** <summary>
353         *  For all subtrees that match the pattern, execute the visit action.
354         *  The implementation uses the root node of the pattern in combination
355         *  with visit(t, ttype, visitor) so nil-rooted patterns are not allowed.
356         *  Patterns with wildcard roots are also not allowed.
357         *  </summary>
358         */
359        public void Visit(object t, string pattern, IContextVisitor visitor) {
360            // Create a TreePattern from the pattern
361            TreePatternLexer tokenizer = new TreePatternLexer(pattern);
362            TreePatternParser parser =
363                new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
364            TreePattern tpattern = (TreePattern)parser.Pattern();
365            // don't allow invalid patterns
366            if (tpattern == null ||
367                 tpattern.IsNil ||
368                 tpattern.GetType() == typeof(WildcardTreePattern)) {
369                return;
370            }
371            IDictionary<string, object> labels = new Dictionary<string, object>(); // reused for each _parse
372            int rootTokenType = tpattern.Type;
373            Visit(t, rootTokenType, new VisitTreeWizardContextVisitor(this, visitor, labels, tpattern));
374        }
375
376        /** <summary>
377         *  Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels
378         *  on the various nodes and '.' (dot) as the node/subtree wildcard,
379         *  return true if the pattern matches and fill the labels Map with
380         *  the labels pointing at the appropriate nodes.  Return false if
381         *  the pattern is malformed or the tree does not match.
382         *  </summary>
383         *
384         *  <remarks>
385         *  If a node specifies a text arg in pattern, then that must match
386         *  for that node in t.
387         *
388         *  TODO: what's a better way to indicate bad pattern? Exceptions are a hassle
389         *  </remarks>
390         */
391        public bool Parse(object t, string pattern, IDictionary<string, object> labels) {
392            TreePatternLexer tokenizer = new TreePatternLexer(pattern);
393            TreePatternParser parser =
394                new TreePatternParser(tokenizer, this, new TreePatternTreeAdaptor());
395            TreePattern tpattern = (TreePattern)parser.Pattern();
396            /*
397            System.out.println("t="+((Tree)t).toStringTree());
398            System.out.println("scant="+tpattern.toStringTree());
399            */
400            bool matched = ParseCore(t, tpattern, labels);
401            return matched;
402        }
403
404        public bool Parse(object t, string pattern) {
405            return Parse(t, pattern, null);
406        }
407
408        /** <summary>
409         *  Do the work for parse. Check to see if the t2 pattern fits the
410         *  structure and token types in t1.  Check text if the pattern has
411         *  text arguments on nodes.  Fill labels map with pointers to nodes
412         *  in tree matched against nodes in pattern with labels.
413         *  </summary>
414         */
415        protected virtual bool ParseCore(object t1, TreePattern tpattern, IDictionary<string, object> labels) {
416            // make sure both are non-null
417            if (t1 == null || tpattern == null) {
418                return false;
419            }
420            // check roots (wildcard matches anything)
421            if (tpattern.GetType() != typeof(WildcardTreePattern)) {
422                if (adaptor.GetType(t1) != tpattern.Type) {
423                    return false;
424                }
425                // if pattern has text, check node text
426                if (tpattern.hasTextArg && !adaptor.GetText(t1).Equals(tpattern.Text)) {
427                    return false;
428                }
429            }
430            if (tpattern.label != null && labels != null) {
431                // map label in pattern to node in t1
432                labels[tpattern.label] = t1;
433            }
434            // check children
435            int n1 = adaptor.GetChildCount(t1);
436            int n2 = tpattern.ChildCount;
437            if (n1 != n2) {
438                return false;
439            }
440            for (int i = 0; i < n1; i++) {
441                object child1 = adaptor.GetChild(t1, i);
442                TreePattern child2 = (TreePattern)tpattern.GetChild(i);
443                if (!ParseCore(child1, child2, labels)) {
444                    return false;
445                }
446            }
447            return true;
448        }
449
450        /** <summary>
451         *  Create a tree or node from the indicated tree pattern that closely
452         *  follows ANTLR tree grammar tree element syntax:
453         *
454         *      (root child1 ... child2).
455         *  </summary>
456         *
457         *  <remarks>
458         *  You can also just pass in a node: ID
459         *
460         *  Any node can have a text argument: ID[foo]
461         *  (notice there are no quotes around foo--it's clear it's a string).
462         *
463         *  nil is a special name meaning "give me a nil node".  Useful for
464         *  making lists: (nil A B C) is a list of A B C.
465         *  </remarks>
466         */
467        public virtual object Create(string pattern) {
468            TreePatternLexer tokenizer = new TreePatternLexer(pattern);
469            TreePatternParser parser = new TreePatternParser(tokenizer, this, adaptor);
470            object t = parser.Pattern();
471            return t;
472        }
473
474        /** <summary>
475         *  Compare t1 and t2; return true if token types/text, structure match exactly.
476         *  The trees are examined in their entirety so that (A B) does not match
477         *  (A B C) nor (A (B C)).
478         *  </summary>
479         *
480         *  <remarks>
481         *  TODO: allow them to pass in a comparator
482         *  TODO: have a version that is nonstatic so it can use instance adaptor
483         *
484         *  I cannot rely on the tree node's equals() implementation as I make
485         *  no constraints at all on the node types nor interface etc...
486         *  </remarks>
487         */
488        public static bool Equals(object t1, object t2, ITreeAdaptor adaptor) {
489            return EqualsCore(t1, t2, adaptor);
490        }
491
492        /** <summary>
493         *  Compare type, structure, and text of two trees, assuming adaptor in
494         *  this instance of a TreeWizard.
495         *  </summary>
496         */
497        public new bool Equals(object t1, object t2) {
498            return EqualsCore(t1, t2, adaptor);
499        }
500
501        protected static bool EqualsCore(object t1, object t2, ITreeAdaptor adaptor) {
502            // make sure both are non-null
503            if (t1 == null || t2 == null) {
504                return false;
505            }
506            // check roots
507            if (adaptor.GetType(t1) != adaptor.GetType(t2)) {
508                return false;
509            }
510            if (!adaptor.GetText(t1).Equals(adaptor.GetText(t2))) {
511                return false;
512            }
513            // check children
514            int n1 = adaptor.GetChildCount(t1);
515            int n2 = adaptor.GetChildCount(t2);
516            if (n1 != n2) {
517                return false;
518            }
519            for (int i = 0; i < n1; i++) {
520                object child1 = adaptor.GetChild(t1, i);
521                object child2 = adaptor.GetChild(t2, i);
522                if (!EqualsCore(child1, child2, adaptor)) {
523                    return false;
524                }
525            }
526            return true;
527        }
528
529#if BUILD_INDEXES
530        // TODO: next stuff taken from CommonTreeNodeStream
531
532        /** <summary>
533         *  Given a node, add this to the reverse index tokenTypeToStreamIndexesMap.
534         *  You can override this method to alter how indexing occurs.  The
535         *  default is to create a
536         *
537         *    Map&lt;Integer token type,ArrayList&lt;Integer stream index&gt;&gt;
538         *  </summary>
539         *
540         *  <remarks>
541         *  This data structure allows you to find all nodes with type INT in order.
542         *
543         *  If you really need to find a node of type, say, FUNC quickly then perhaps
544         *
545         *    Map&lt;Integertoken type,Map&lt;Object tree node,Integer stream index&gt;&gt;
546         *
547         *  would be better for you.  The interior maps map a tree node to
548         *  the index so you don't have to search linearly for a specific node.
549         *
550         *  If you change this method, you will likely need to change
551         *  getNodeIndex(), which extracts information.
552         *  </remarks>
553         */
554        protected void fillReverseIndex( object node, int streamIndex )
555        {
556            //System.out.println("revIndex "+node+"@"+streamIndex);
557            if ( tokenTypesToReverseIndex == null )
558            {
559                return; // no indexing if this is empty (nothing of interest)
560            }
561            if ( tokenTypeToStreamIndexesMap == null )
562            {
563                tokenTypeToStreamIndexesMap = new Dictionary<int, IList<int>>(); // first indexing op
564            }
565            int tokenType = adaptor.getType( node );
566            if ( !( tokenTypesToReverseIndex == INDEX_ALL ||
567                   tokenTypesToReverseIndex.Contains( tokenType ) ) )
568            {
569                return; // tokenType not of interest
570            }
571            IList<int> indexes;
572
573            if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null )
574            {
575                indexes = new List<int>(); // no list yet for this token type
576                indexes.Add( streamIndex ); // not there yet, add
577                tokenTypeToStreamIndexesMap[tokenType] = indexes;
578            }
579            else
580            {
581                if ( !indexes.Contains( streamIndex ) )
582                {
583                    indexes.Add( streamIndex ); // not there yet, add
584                }
585            }
586        }
587
588        /** <summary>
589         *  Track the indicated token type in the reverse index.  Call this
590         *  repeatedly for each type or use variant with Set argument to
591         *  set all at once.
592         *  </summary>
593         *
594         *  <param name="tokenType" />
595         */
596        public void reverseIndex( int tokenType )
597        {
598            if ( tokenTypesToReverseIndex == null )
599            {
600                tokenTypesToReverseIndex = new HashSet<int>();
601            }
602            else if ( tokenTypesToReverseIndex == INDEX_ALL )
603            {
604                return;
605            }
606            tokenTypesToReverseIndex.add( tokenType );
607        }
608
609        /** <summary>
610         *  Track the indicated token types in the reverse index. Set
611         *  to INDEX_ALL to track all token types.
612         *  </summary>
613         */
614        public void reverseIndex( HashSet<int> tokenTypes )
615        {
616            tokenTypesToReverseIndex = tokenTypes;
617        }
618
619        /** <summary>
620         *  Given a node pointer, return its index into the node stream.
621         *  This is not its Token stream index.  If there is no reverse map
622         *  from node to stream index or the map does not contain entries
623         *  for node's token type, a linear search of entire stream is used.
624         *  </summary>
625         *
626         *  <remarks>
627         *  Return -1 if exact node pointer not in stream.
628         *  </remarks>
629         */
630        public int getNodeIndex( object node )
631        {
632            //System.out.println("get "+node);
633            if ( tokenTypeToStreamIndexesMap == null )
634            {
635                return getNodeIndexLinearly( node );
636            }
637            int tokenType = adaptor.getType( node );
638            IList<int> indexes;
639            if ( !tokenTypeToStreamIndexesMap.TryGetValue( tokenType, out indexes ) || indexes == null )
640            {
641                //System.out.println("found linearly; stream index = "+getNodeIndexLinearly(node));
642                return getNodeIndexLinearly( node );
643            }
644            for ( int i = 0; i < indexes.size(); i++ )
645            {
646                int streamIndex = indexes[i];
647                object n = get( streamIndex );
648                if ( n == node )
649                {
650                    //System.out.println("found in index; stream index = "+streamIndexI);
651                    return streamIndex; // found it!
652                }
653            }
654            return -1;
655        }
656#endif
657
658    }
659}
660