Label.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.analysis;
29
30import org.antlr.misc.IntSet;
31import org.antlr.misc.IntervalSet;
32import org.antlr.tool.Grammar;
33
34/** A state machine transition label.  A label can be either a simple
35 *  label such as a token or character.  A label can be a set of char or
36 *  tokens.  It can be an epsilon transition.  It can be a semantic predicate
37 *  (which assumes an epsilon transition) or a tree of predicates (in a DFA).
38 *  Special label types have to be < 0 to avoid conflict with char.
39 */
40public class Label implements Comparable, Cloneable {
41    public static final int INVALID = -7;
42
43	public static final int ACTION = -6;
44
45	public static final int EPSILON = -5;
46
47    public static final String EPSILON_STR = "<EPSILON>";
48
49    /** label is a semantic predicate; implies label is epsilon also */
50    public static final int SEMPRED = -4;
51
52    /** label is a set of tokens or char */
53    public static final int SET = -3;
54
55    /** End of Token is like EOF for lexer rules.  It implies that no more
56     *  characters are available and that NFA conversion should terminate
57     *  for this path.  For example
58     *
59     *  A : 'a' 'b' | 'a' ;
60     *
61     *  yields a DFA predictor:
62     *
63     *  o-a->o-b->1   predict alt 1
64     *       |
65     *       |-EOT->o predict alt 2
66     *
67     *  To generate code for EOT, treat it as the "default" path, which
68     *  implies there is no way to mismatch a char for the state from
69     *  which the EOT emanates.
70     */
71    public static final int EOT = -2;
72
73    public static final int EOF = -1;
74
75	/** We have labels like EPSILON that are below 0; it's hard to
76	 *  store them in an array with negative index so use this
77	 *  constant as an index shift when accessing arrays based upon
78	 *  token type.  If real token type is i, then array index would be
79	 *  NUM_FAUX_LABELS + i.
80	 */
81	public static final int NUM_FAUX_LABELS = -INVALID;
82
83    /** Anything at this value or larger can be considered a simple atom int
84     *  for easy comparison during analysis only; faux labels are not used
85	 *  during parse time for real token types or char values.
86     */
87    public static final int MIN_ATOM_VALUE = EOT;
88
89    // TODO: is 0 a valid unicode char? max is FFFF -1, right?
90    public static final int MIN_CHAR_VALUE = '\u0000';
91    public static final int MAX_CHAR_VALUE = '\uFFFF';
92
93	/** End of rule token type; imaginary token type used only for
94	 *  local, partial FOLLOW sets to indicate that the local FOLLOW
95	 *  hit the end of rule.  During error recovery, the local FOLLOW
96	 *  of a token reference may go beyond the end of the rule and have
97	 *  to use FOLLOW(rule).  I have to just shift the token types to 2..n
98	 *  rather than 1..n to accommodate this imaginary token in my bitsets.
99	 *  If I didn't use a bitset implementation for runtime sets, I wouldn't
100	 *  need this.  EOF is another candidate for a run time token type for
101	 *  parsers.  Follow sets are not computed for lexers so we do not have
102	 *  this issue.
103	 */
104	public static final int EOR_TOKEN_TYPE =
105		org.antlr.runtime.Token.EOR_TOKEN_TYPE;
106
107	public static final int DOWN = org.antlr.runtime.Token.DOWN;
108	public static final int UP = org.antlr.runtime.Token.UP;
109
110    /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
111	public static final int MIN_TOKEN_TYPE =
112		org.antlr.runtime.Token.MIN_TOKEN_TYPE;
113
114    /** The wildcard '.' char atom implies all valid characters==UNICODE */
115    //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);
116
117    /** The token type or character value; or, signifies special label. */
118    protected int label;
119
120    /** A set of token types or character codes if label==SET */
121	// TODO: try IntervalSet for everything
122    protected IntSet labelSet;
123
124    public Label(int label) {
125        this.label = label;
126    }
127
128    /** Make a set label */
129    public Label(IntSet labelSet) {
130		if ( labelSet==null ) {
131			this.label = SET;
132			this.labelSet = IntervalSet.of(INVALID);
133			return;
134		}
135		int singleAtom = labelSet.getSingleElement();
136        if ( singleAtom!=INVALID ) {
137            // convert back to a single atomic element if |labelSet|==1
138            label = singleAtom;
139            return;
140        }
141        this.label = SET;
142        this.labelSet = labelSet;
143    }
144
145	public Object clone() {
146		Label l;
147		try {
148			l = (Label)super.clone();
149			l.label = this.label;
150            l.labelSet = new IntervalSet();
151			l.labelSet.addAll(this.labelSet);
152		}
153		catch (CloneNotSupportedException e) {
154			throw new InternalError();
155		}
156		return l;
157	}
158
159	public void add(Label a) {
160		if ( isAtom() ) {
161			labelSet = IntervalSet.of(label);
162			label=SET;
163			if ( a.isAtom() ) {
164				labelSet.add(a.getAtom());
165			}
166			else if ( a.isSet() ) {
167				labelSet.addAll(a.getSet());
168			}
169			else {
170				throw new IllegalStateException("can't add element to Label of type "+label);
171			}
172			return;
173		}
174		if ( isSet() ) {
175			if ( a.isAtom() ) {
176				labelSet.add(a.getAtom());
177			}
178			else if ( a.isSet() ) {
179				labelSet.addAll(a.getSet());
180			}
181			else {
182				throw new IllegalStateException("can't add element to Label of type "+label);
183			}
184			return;
185		}
186		throw new IllegalStateException("can't add element to Label of type "+label);
187	}
188
189    public boolean isAtom() {
190        return label>=MIN_ATOM_VALUE;
191    }
192
193    public boolean isEpsilon() {
194        return label==EPSILON;
195    }
196
197	public boolean isSemanticPredicate() {
198		return false;
199	}
200
201	public boolean isAction() {
202		return false;
203	}
204
205    public boolean isSet() {
206        return label==SET;
207    }
208
209    /** return the single atom label or INVALID if not a single atom */
210    public int getAtom() {
211        if ( isAtom() ) {
212            return label;
213        }
214        return INVALID;
215    }
216
217    public IntSet getSet() {
218        if ( label!=SET ) {
219            // convert single element to a set if they ask for it.
220            return IntervalSet.of(label);
221        }
222        return labelSet;
223    }
224
225    public void setSet(IntSet set) {
226        label=SET;
227        labelSet = set;
228    }
229
230    public SemanticContext getSemanticContext() {
231        return null;
232    }
233
234	public boolean matches(int atom) {
235		if ( label==atom ) {
236			return true; // handle the single atom case efficiently
237		}
238		if ( isSet() ) {
239			return labelSet.member(atom);
240		}
241		return false;
242	}
243
244	public boolean matches(IntSet set) {
245		if ( isAtom() ) {
246			return set.member(getAtom());
247		}
248		if ( isSet() ) {
249			// matches if intersection non-nil
250			return !getSet().and(set).isNil();
251		}
252		return false;
253	}
254
255
256	public boolean matches(Label other) {
257		if ( other.isSet() ) {
258			return matches(other.getSet());
259		}
260		if ( other.isAtom() ) {
261			return matches(other.getAtom());
262		}
263		return false;
264	}
265
266    public int hashCode() {
267        if (label==SET) {
268            return labelSet.hashCode();
269		}
270		else {
271			return label;
272		}
273	}
274
275	// TODO: do we care about comparing set {A} with atom A? Doesn't now.
276	public boolean equals(Object o) {
277		if ( o==null ) {
278			return false;
279		}
280		if ( this == o ) {
281			return true; // equals if same object
282		}
283		// labels must be the same even if epsilon or set or sempred etc...
284        if ( label!=((Label)o).label ) {
285            return false;
286        }
287		if ( label==SET ) {
288			return this.labelSet.equals(((Label)o).labelSet);
289		}
290		return true;  // label values are same, so true
291    }
292
293    public int compareTo(Object o) {
294        return this.label-((Label)o).label;
295    }
296
297    /** Predicates are lists of AST nodes from the NFA created from the
298     *  grammar, but the same predicate could be cut/paste into multiple
299     *  places in the grammar.  I must compare the text of all the
300     *  predicates to truly answer whether {p1,p2} .equals {p1,p2}.
301     *  Unfortunately, I cannot rely on the AST.equals() to work properly
302     *  so I must do a brute force O(n^2) nested traversal of the Set
303     *  doing a String compare.
304     *
305     *  At this point, Labels are not compared for equals when they are
306     *  predicates, but here's the code for future use.
307     */
308    /*
309    protected boolean predicatesEquals(Set others) {
310        Iterator iter = semanticContext.iterator();
311        while (iter.hasNext()) {
312            AST predAST = (AST) iter.next();
313            Iterator inner = semanticContext.iterator();
314            while (inner.hasNext()) {
315                AST otherPredAST = (AST) inner.next();
316                if ( !predAST.getText().equals(otherPredAST.getText()) ) {
317                    return false;
318                }
319            }
320        }
321        return true;
322    }
323      */
324
325    public String toString() {
326        switch (label) {
327            case SET :
328                return labelSet.toString();
329            default :
330                return String.valueOf(label);
331        }
332    }
333
334    public String toString(Grammar g) {
335        switch (label) {
336            case SET :
337                return labelSet.toString(g);
338            default :
339                return g.getTokenDisplayName(label);
340        }
341    }
342
343    /*
344    public String predicatesToString() {
345        if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
346            return "!other preds";
347        }
348        StringBuffer buf = new StringBuffer();
349        Iterator iter = semanticContext.iterator();
350        while (iter.hasNext()) {
351            AST predAST = (AST) iter.next();
352            buf.append(predAST.getText());
353            if ( iter.hasNext() ) {
354                buf.append("&");
355            }
356        }
357        return buf.toString();
358    }
359    */
360
361	public static boolean intersect(Label label, Label edgeLabel) {
362		boolean hasIntersection = false;
363		boolean labelIsSet = label.isSet();
364		boolean edgeIsSet = edgeLabel.isSet();
365		if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) {
366			hasIntersection = true;
367		}
368		else if ( labelIsSet && edgeIsSet &&
369				  !edgeLabel.getSet().and(label.getSet()).isNil() ) {
370			hasIntersection = true;
371		}
372		else if ( labelIsSet && !edgeIsSet &&
373				  label.getSet().member(edgeLabel.label) ) {
374			hasIntersection = true;
375		}
376		else if ( !labelIsSet && edgeIsSet &&
377				  edgeLabel.getSet().member(label.label) ) {
378			hasIntersection = true;
379		}
380		return hasIntersection;
381	}
382}
383