1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.misc;
29
30import org.antlr.analysis.Label;
31import org.antlr.tool.Grammar;
32
33import java.util.ArrayList;
34import java.util.Iterator;
35import java.util.List;
36import java.util.ListIterator;
37
38/** A set of integers that relies on ranges being common to do
39 *  "run-length-encoded" like compression (if you view an IntSet like
40 *  a BitSet with runs of 0s and 1s).  Only ranges are recorded so that
41 *  a few ints up near value 1000 don't cause massive bitsets, just two
42 *  integer intervals.
43 *
44 *  element values may be negative.  Useful for sets of EPSILON and EOF.
45 *
46 *  0..9 char range is index pair ['\u0030','\u0039'].
47 *  Multiple ranges are encoded with multiple index pairs.  Isolated
48 *  elements are encoded with an index pair where both intervals are the same.
49 *
50 *  The ranges are ordered and disjoint so that 2..6 appears before 101..103.
51 */
52public class IntervalSet implements IntSet {
53	public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE);
54
55	/** The list of sorted, disjoint intervals. */
56    protected List<Interval> intervals;
57
58	/** Create a set with no elements */
59    public IntervalSet() {
60        intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements
61    }
62
63	public IntervalSet(List<Interval> intervals) {
64		this.intervals = intervals;
65	}
66
67	/** Create a set with a single element, el. */
68    public static IntervalSet of(int a) {
69		IntervalSet s = new IntervalSet();
70        s.add(a);
71        return s;
72    }
73
74    /** Create a set with all ints within range [a..b] (inclusive) */
75    public static IntervalSet of(int a, int b) {
76        IntervalSet s = new IntervalSet();
77        s.add(a,b);
78        return s;
79    }
80
81    /** Add a single element to the set.  An isolated element is stored
82     *  as a range el..el.
83     */
84    public void add(int el) {
85        add(el,el);
86    }
87
88    /** Add interval; i.e., add all integers from a to b to set.
89     *  If b<a, do nothing.
90     *  Keep list in sorted order (by left range value).
91     *  If overlap, combine ranges.  For example,
92     *  If this is {1..5, 10..20}, adding 6..7 yields
93     *  {1..5, 6..7, 10..20}.  Adding 4..8 yields {1..8, 10..20}.
94     */
95    public void add(int a, int b) {
96        add(Interval.create(a,b));
97    }
98
99	// copy on write so we can cache a..a intervals and sets of that
100	protected void add(Interval addition) {
101		//System.out.println("add "+addition+" to "+intervals.toString());
102		if ( addition.b<addition.a ) {
103			return;
104		}
105		// find position in list
106		// Use iterators as we modify list in place
107		for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
108			Interval r = (Interval) iter.next();
109			if ( addition.equals(r) ) {
110				return;
111			}
112			if ( addition.adjacent(r) || !addition.disjoint(r) ) {
113				// next to each other, make a single larger interval
114				Interval bigger = addition.union(r);
115				iter.set(bigger);
116				// make sure we didn't just create an interval that
117				// should be merged with next interval in list
118				if ( iter.hasNext() ) {
119					Interval next = (Interval) iter.next();
120					if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
121						// if we bump up against or overlap next, merge
122						iter.remove();   // remove this one
123						iter.previous(); // move backwards to what we just set
124						iter.set(bigger.union(next)); // set to 3 merged ones
125					}
126				}
127				return;
128			}
129			if ( addition.startsBeforeDisjoint(r) ) {
130				// insert before r
131				iter.previous();
132				iter.add(addition);
133				return;
134			}
135			// if disjoint and after r, a future iteration will handle it
136		}
137		// ok, must be after last interval (and disjoint from last interval)
138		// just add it
139		intervals.add(addition);
140	}
141
142	/*
143	protected void add(Interval addition) {
144        //System.out.println("add "+addition+" to "+intervals.toString());
145        if ( addition.b<addition.a ) {
146            return;
147        }
148        // find position in list
149        //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
150		int n = intervals.size();
151		for (int i=0; i<n; i++) {
152			Interval r = (Interval)intervals.get(i);
153            if ( addition.equals(r) ) {
154                return;
155            }
156            if ( addition.adjacent(r) || !addition.disjoint(r) ) {
157                // next to each other, make a single larger interval
158                Interval bigger = addition.union(r);
159				intervals.set(i, bigger);
160                // make sure we didn't just create an interval that
161                // should be merged with next interval in list
162				if ( (i+1)<n ) {
163					i++;
164					Interval next = (Interval)intervals.get(i);
165                    if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
166                        // if we bump up against or overlap next, merge
167						intervals.remove(i); // remove next one
168						i--;
169						intervals.set(i, bigger.union(next)); // set to 3 merged ones
170                    }
171                }
172                return;
173            }
174            if ( addition.startsBeforeDisjoint(r) ) {
175                // insert before r
176				intervals.add(i, addition);
177                return;
178            }
179            // if disjoint and after r, a future iteration will handle it
180        }
181        // ok, must be after last interval (and disjoint from last interval)
182        // just add it
183        intervals.add(addition);
184    }
185*/
186
187	public void addAll(IntSet set) {
188		if ( set==null ) {
189			return;
190		}
191        if ( !(set instanceof IntervalSet) ) {
192            throw new IllegalArgumentException("can't add non IntSet ("+
193											   set.getClass().getName()+
194											   ") to IntervalSet");
195        }
196        IntervalSet other = (IntervalSet)set;
197        // walk set and add each interval
198		int n = other.intervals.size();
199		for (int i = 0; i < n; i++) {
200			Interval I = (Interval) other.intervals.get(i);
201			this.add(I.a,I.b);
202		}
203    }
204
205    public IntSet complement(int minElement, int maxElement) {
206        return this.complement(IntervalSet.of(minElement,maxElement));
207    }
208
209    /** Given the set of possible values (rather than, say UNICODE or MAXINT),
210     *  return a new set containing all elements in vocabulary, but not in
211     *  this.  The computation is (vocabulary - this).
212     *
213     *  'this' is assumed to be either a subset or equal to vocabulary.
214     */
215    public IntSet complement(IntSet vocabulary) {
216        if ( vocabulary==null ) {
217            return null; // nothing in common with null set
218        }
219		if ( !(vocabulary instanceof IntervalSet ) ) {
220			throw new IllegalArgumentException("can't complement with non IntervalSet ("+
221											   vocabulary.getClass().getName()+")");
222		}
223		IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
224		int maxElement = vocabularyIS.getMaxElement();
225
226		IntervalSet compl = new IntervalSet();
227		int n = intervals.size();
228		if ( n ==0 ) {
229			return compl;
230		}
231		Interval first = (Interval)intervals.get(0);
232		// add a range from 0 to first.a constrained to vocab
233		if ( first.a > 0 ) {
234			IntervalSet s = IntervalSet.of(0, first.a-1);
235			IntervalSet a = (IntervalSet)s.and(vocabularyIS);
236			compl.addAll(a);
237		}
238		for (int i=1; i<n; i++) { // from 2nd interval .. nth
239			Interval previous = (Interval)intervals.get(i-1);
240			Interval current = (Interval)intervals.get(i);
241			IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
242			IntervalSet a = (IntervalSet)s.and(vocabularyIS);
243			compl.addAll(a);
244		}
245		Interval last = (Interval)intervals.get(n -1);
246		// add a range from last.b to maxElement constrained to vocab
247		if ( last.b < maxElement ) {
248			IntervalSet s = IntervalSet.of(last.b+1, maxElement);
249			IntervalSet a = (IntervalSet)s.and(vocabularyIS);
250			compl.addAll(a);
251		}
252		return compl;
253    }
254
255	/** Compute this-other via this&~other.
256	 *  Return a new set containing all elements in this but not in other.
257	 *  other is assumed to be a subset of this;
258     *  anything that is in other but not in this will be ignored.
259	 */
260	public IntSet subtract(IntSet other) {
261		// assume the whole unicode range here for the complement
262		// because it doesn't matter.  Anything beyond the max of this' set
263		// will be ignored since we are doing this & ~other.  The intersection
264		// will be empty.  The only problem would be when this' set max value
265		// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
266		// will prevent this.
267		return this.and(((IntervalSet)other).complement(COMPLETE_SET));
268	}
269
270	/** return a new set containing all elements in this but not in other.
271     *  Intervals may have to be broken up when ranges in this overlap
272     *  with ranges in other.  other is assumed to be a subset of this;
273     *  anything that is in other but not in this will be ignored.
274	 *
275	 *  Keep around, but 10-20-2005, I decided to make complement work w/o
276	 *  subtract and so then subtract can simply be a&~b
277	 *
278    public IntSet subtract(IntSet other) {
279        if ( other==null || !(other instanceof IntervalSet) ) {
280            return null; // nothing in common with null set
281        }
282
283        IntervalSet diff = new IntervalSet();
284
285        // iterate down both interval lists
286        ListIterator thisIter = this.intervals.listIterator();
287        ListIterator otherIter = ((IntervalSet)other).intervals.listIterator();
288        Interval mine=null;
289        Interval theirs=null;
290        if ( thisIter.hasNext() ) {
291            mine = (Interval)thisIter.next();
292        }
293        if ( otherIter.hasNext() ) {
294            theirs = (Interval)otherIter.next();
295        }
296        while ( mine!=null ) {
297            //System.out.println("mine="+mine+", theirs="+theirs);
298            // CASE 1: nothing in theirs removes a chunk from mine
299            if ( theirs==null || mine.disjoint(theirs) ) {
300                // SUBCASE 1a: finished traversing theirs; keep adding mine now
301                if ( theirs==null ) {
302                    // add everything in mine to difference since theirs done
303                    diff.add(mine);
304                    mine = null;
305                    if ( thisIter.hasNext() ) {
306                        mine = (Interval)thisIter.next();
307                    }
308                }
309                else {
310                    // SUBCASE 1b: mine is completely to the left of theirs
311                    // so we can add to difference; move mine, but not theirs
312                    if ( mine.startsBeforeDisjoint(theirs) ) {
313                        diff.add(mine);
314                        mine = null;
315                        if ( thisIter.hasNext() ) {
316                            mine = (Interval)thisIter.next();
317                        }
318                    }
319                    // SUBCASE 1c: theirs is completely to the left of mine
320                    else {
321                        // keep looking in theirs
322                        theirs = null;
323                        if ( otherIter.hasNext() ) {
324                            theirs = (Interval)otherIter.next();
325                        }
326                    }
327                }
328            }
329            else {
330                // CASE 2: theirs breaks mine into two chunks
331                if ( mine.properlyContains(theirs) ) {
332                    // must add two intervals: stuff to left and stuff to right
333                    diff.add(mine.a, theirs.a-1);
334                    // don't actually add stuff to right yet as next 'theirs'
335                    // might overlap with it
336                    // The stuff to the right might overlap with next "theirs".
337                    // so it is considered next
338                    Interval right = new Interval(theirs.b+1, mine.b);
339                    mine = right;
340                    // move theirs forward
341                    theirs = null;
342                    if ( otherIter.hasNext() ) {
343                        theirs = (Interval)otherIter.next();
344                    }
345                }
346
347                // CASE 3: theirs covers mine; nothing to add to diff
348                else if ( theirs.properlyContains(mine) ) {
349                    // nothing to add, theirs forces removal totally of mine
350                    // just move mine looking for an overlapping interval
351                    mine = null;
352                    if ( thisIter.hasNext() ) {
353                        mine = (Interval)thisIter.next();
354                    }
355                }
356
357                // CASE 4: non proper overlap
358                else {
359                    // overlap, but not properly contained
360                    diff.add(mine.differenceNotProperlyContained(theirs));
361                    // update iterators
362                    boolean moveTheirs = true;
363                    if ( mine.startsBeforeNonDisjoint(theirs) ||
364                         theirs.b > mine.b )
365                    {
366                        // uh oh, right of theirs extends past right of mine
367                        // therefore could overlap with next of mine so don't
368                        // move theirs iterator yet
369                        moveTheirs = false;
370                    }
371                    // always move mine
372                    mine = null;
373                    if ( thisIter.hasNext() ) {
374                        mine = (Interval)thisIter.next();
375                    }
376                    if ( moveTheirs ) {
377                        theirs = null;
378                        if ( otherIter.hasNext() ) {
379                            theirs = (Interval)otherIter.next();
380                        }
381                    }
382                }
383            }
384        }
385        return diff;
386    }
387	 */
388
389    /** TODO: implement this! */
390	public IntSet or(IntSet a) {
391		IntervalSet o = new IntervalSet();
392		o.addAll(this);
393		o.addAll(a);
394		//throw new NoSuchMethodError();
395		return o;
396	}
397
398    /** Return a new set with the intersection of this set with other.  Because
399     *  the intervals are sorted, we can use an iterator for each list and
400     *  just walk them together.  This is roughly O(min(n,m)) for interval
401     *  list lengths n and m.
402     */
403	public IntSet and(IntSet other) {
404		if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
405			return null; // nothing in common with null set
406		}
407
408		ArrayList myIntervals = (ArrayList)this.intervals;
409		ArrayList theirIntervals = (ArrayList)((IntervalSet)other).intervals;
410		IntervalSet intersection = null;
411		int mySize = myIntervals.size();
412		int theirSize = theirIntervals.size();
413		int i = 0;
414		int j = 0;
415		// iterate down both interval lists looking for nondisjoint intervals
416		while ( i<mySize && j<theirSize ) {
417			Interval mine = (Interval)myIntervals.get(i);
418			Interval theirs = (Interval)theirIntervals.get(j);
419			//System.out.println("mine="+mine+" and theirs="+theirs);
420			if ( mine.startsBeforeDisjoint(theirs) ) {
421				// move this iterator looking for interval that might overlap
422				i++;
423			}
424			else if ( theirs.startsBeforeDisjoint(mine) ) {
425				// move other iterator looking for interval that might overlap
426				j++;
427			}
428			else if ( mine.properlyContains(theirs) ) {
429				// overlap, add intersection, get next theirs
430				if ( intersection==null ) {
431					intersection = new IntervalSet();
432				}
433				intersection.add(mine.intersection(theirs));
434				j++;
435			}
436			else if ( theirs.properlyContains(mine) ) {
437				// overlap, add intersection, get next mine
438				if ( intersection==null ) {
439					intersection = new IntervalSet();
440				}
441				intersection.add(mine.intersection(theirs));
442				i++;
443			}
444			else if ( !mine.disjoint(theirs) ) {
445				// overlap, add intersection
446				if ( intersection==null ) {
447					intersection = new IntervalSet();
448				}
449				intersection.add(mine.intersection(theirs));
450				// Move the iterator of lower range [a..b], but not
451				// the upper range as it may contain elements that will collide
452				// with the next iterator. So, if mine=[0..115] and
453				// theirs=[115..200], then intersection is 115 and move mine
454				// but not theirs as theirs may collide with the next range
455				// in thisIter.
456				// move both iterators to next ranges
457				if ( mine.startsAfterNonDisjoint(theirs) ) {
458					j++;
459				}
460				else if ( theirs.startsAfterNonDisjoint(mine) ) {
461					i++;
462				}
463			}
464		}
465		if ( intersection==null ) {
466			return new IntervalSet();
467		}
468		return intersection;
469	}
470
471    /** Is el in any range of this set? */
472    public boolean member(int el) {
473		int n = intervals.size();
474		for (int i = 0; i < n; i++) {
475			Interval I = (Interval) intervals.get(i);
476			int a = I.a;
477			int b = I.b;
478			if ( el<a ) {
479				break; // list is sorted and el is before this interval; not here
480			}
481			if ( el>=a && el<=b ) {
482				return true; // found in this interval
483			}
484		}
485		return false;
486/*
487		for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
488            Interval I = (Interval) iter.next();
489            if ( el<I.a ) {
490                break; // list is sorted and el is before this interval; not here
491            }
492            if ( el>=I.a && el<=I.b ) {
493                return true; // found in this interval
494            }
495        }
496        return false;
497        */
498    }
499
500    /** return true if this set has no members */
501    public boolean isNil() {
502        return intervals==null || intervals.size()==0;
503    }
504
505    /** If this set is a single integer, return it otherwise Label.INVALID */
506    public int getSingleElement() {
507        if ( intervals!=null && intervals.size()==1 ) {
508            Interval I = (Interval)intervals.get(0);
509            if ( I.a == I.b ) {
510                return I.a;
511            }
512        }
513        return Label.INVALID;
514    }
515
516	public int getMaxElement() {
517		if ( isNil() ) {
518			return Label.INVALID;
519		}
520		Interval last = (Interval)intervals.get(intervals.size()-1);
521		return last.b;
522	}
523
524	/** Return minimum element >= 0 */
525	public int getMinElement() {
526		if ( isNil() ) {
527			return Label.INVALID;
528		}
529		int n = intervals.size();
530		for (int i = 0; i < n; i++) {
531			Interval I = (Interval) intervals.get(i);
532			int a = I.a;
533			int b = I.b;
534			for (int v=a; v<=b; v++) {
535				if ( v>=0 ) return v;
536			}
537		}
538		return Label.INVALID;
539	}
540
541    /** Return a list of Interval objects. */
542    public List<Interval> getIntervals() {
543        return intervals;
544    }
545
546    /** Are two IntervalSets equal?  Because all intervals are sorted
547     *  and disjoint, equals is a simple linear walk over both lists
548     *  to make sure they are the same.  Interval.equals() is used
549     *  by the List.equals() method to check the ranges.
550     */
551    public boolean equals(Object obj) {
552        if ( obj==null || !(obj instanceof IntervalSet) ) {
553            return false;
554        }
555        IntervalSet other = (IntervalSet)obj;
556        return this.intervals.equals(other.intervals);
557    }
558
559    public String toString() {
560        return toString(null);
561    }
562
563    public String toString(Grammar g) {
564        StringBuffer buf = new StringBuffer();
565		if ( this.intervals==null || this.intervals.size()==0 ) {
566			return "{}";
567		}
568        if ( this.intervals.size()>1 ) {
569            buf.append("{");
570        }
571        Iterator iter = this.intervals.iterator();
572        while (iter.hasNext()) {
573            Interval I = (Interval) iter.next();
574            int a = I.a;
575            int b = I.b;
576            if ( a==b ) {
577                if ( g!=null ) {
578                    buf.append(g.getTokenDisplayName(a));
579                }
580                else {
581                    buf.append(a);
582                }
583            }
584            else {
585                if ( g!=null ) {
586                    buf.append(g.getTokenDisplayName(a)+".."+g.getTokenDisplayName(b));
587                }
588                else {
589                    buf.append(a+".."+b);
590                }
591            }
592            if ( iter.hasNext() ) {
593                buf.append(", ");
594            }
595        }
596        if ( this.intervals.size()>1 ) {
597            buf.append("}");
598        }
599        return buf.toString();
600    }
601
602    public int size() {
603		int n = 0;
604		int numIntervals = intervals.size();
605		if ( numIntervals==1 ) {
606			Interval firstInterval = this.intervals.get(0);
607			return firstInterval.b-firstInterval.a+1;
608		}
609		for (int i = 0; i < numIntervals; i++) {
610			Interval I = (Interval) intervals.get(i);
611			n += (I.b-I.a+1);
612		}
613		return n;
614    }
615
616    public List toList() {
617		List values = new ArrayList();
618		int n = intervals.size();
619		for (int i = 0; i < n; i++) {
620			Interval I = (Interval) intervals.get(i);
621			int a = I.a;
622			int b = I.b;
623			for (int v=a; v<=b; v++) {
624				values.add(Utils.integer(v));
625			}
626		}
627		return values;
628    }
629
630	/** Get the ith element of ordered set.  Used only by RandomPhrase so
631	 *  don't bother to implement if you're not doing that for a new
632	 *  ANTLR code gen target.
633	 */
634	public int get(int i) {
635		int n = intervals.size();
636		int index = 0;
637		for (int j = 0; j < n; j++) {
638			Interval I = (Interval) intervals.get(j);
639			int a = I.a;
640			int b = I.b;
641			for (int v=a; v<=b; v++) {
642				if ( index==i ) {
643					return v;
644				}
645				index++;
646			}
647		}
648		return -1;
649	}
650
651	public int[] toArray() {
652		int[] values = new int[size()];
653		int n = intervals.size();
654		int j = 0;
655		for (int i = 0; i < n; i++) {
656			Interval I = (Interval) intervals.get(i);
657			int a = I.a;
658			int b = I.b;
659			for (int v=a; v<=b; v++) {
660				values[j] = v;
661				j++;
662			}
663		}
664		return values;
665	}
666
667	public org.antlr.runtime.BitSet toRuntimeBitSet() {
668		org.antlr.runtime.BitSet s =
669			new org.antlr.runtime.BitSet(getMaxElement()+1);
670		int n = intervals.size();
671		for (int i = 0; i < n; i++) {
672			Interval I = (Interval) intervals.get(i);
673			int a = I.a;
674			int b = I.b;
675			for (int v=a; v<=b; v++) {
676				s.add(v);
677			}
678		}
679		return s;
680	}
681
682	public void remove(int el) {
683        throw new NoSuchMethodError("IntervalSet.remove() unimplemented");
684    }
685
686	/*
687	protected void finalize() throws Throwable {
688		super.finalize();
689		System.out.println("size "+intervals.size()+" "+size());
690	}
691	*/
692}
693