1// interval-set.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Class to represent and operate on sets of intervals.
20
21#ifndef FST_LIB_INTERVAL_SET_H__
22#define FST_LIB_INTERVAL_SET_H__
23
24#include <iostream>
25#include <vector>
26using std::vector;
27
28
29#include <fst/util.h>
30
31
32namespace fst {
33
34// Stores and operates on a set of half-open integral intervals [a,b)
35// of signed integers of type T.
36template <typename T>
37class IntervalSet {
38 public:
39  struct Interval {
40    T begin;
41    T end;
42
43    Interval() : begin(-1), end(-1) {}
44
45    Interval(T b, T e) : begin(b), end(e) {}
46
47    bool operator<(const Interval &i) const {
48      return begin < i.begin || (begin == i.begin && end > i.end);
49    }
50
51    bool operator==(const Interval &i) const {
52      return begin == i.begin && end == i.end;
53    }
54
55    bool operator!=(const Interval &i) const {
56      return begin != i.begin || end != i.end;
57    }
58
59    istream &Read(istream &strm) {
60      T n;
61      ReadType(strm, &n);
62      begin = n;
63      ReadType(strm, &n);
64      end = n;
65      return strm;
66    }
67
68    ostream &Write(ostream &strm) const {
69      T n = begin;
70      WriteType(strm, n);
71      n = end;
72      WriteType(strm, n);
73      return strm;
74    }
75  };
76
77  IntervalSet() : count_(-1) {}
78
79  // Returns the interval set as a vector.
80  vector<Interval> *Intervals() { return &intervals_; }
81
82  const vector<Interval> *Intervals() const { return &intervals_; }
83
84  bool Empty() const { return intervals_.empty(); }
85
86  T Size() const { return intervals_.size(); }
87
88  // Number of points in the intervals (undefined if not normalized).
89  T Count() const { return count_; }
90
91  void Clear() {
92    intervals_.clear();
93    count_ = 0;
94  }
95
96  // Adds an interval set to the set. The result may not be normalized.
97  void Union(const IntervalSet<T> &iset) {
98    const vector<Interval> *intervals = iset.Intervals();
99    for (typename vector<Interval>::const_iterator it = intervals->begin();
100         it != intervals->end(); ++it)
101      intervals_.push_back(*it);
102  }
103
104  // Requires intervals be normalized.
105  bool Member(T value) const {
106    Interval interval(value, value);
107    typename vector<Interval>::const_iterator lb =
108        lower_bound(intervals_.begin(), intervals_.end(), interval);
109    if (lb == intervals_.begin())
110      return false;
111    return (--lb)->end > value;
112  }
113
114  // Requires intervals be normalized.
115  bool operator==(const IntervalSet<T>& iset) const {
116    return *(iset.Intervals()) == intervals_;
117  }
118
119  // Requires intervals be normalized.
120  bool operator!=(const IntervalSet<T>& iset) const {
121    return *(iset.Intervals()) != intervals_;
122  }
123
124  bool Singleton() const {
125    return intervals_.size() == 1 &&
126        intervals_[0].begin + 1 == intervals_[0].end;
127  }
128
129
130  // Sorts; collapses overlapping and adjacent interals; sets count.
131  void Normalize();
132
133  // Intersects an interval set with the set. Requires intervals be
134  // normalized. The result is normalized.
135  void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
136
137  // Complements the set w.r.t [0, maxval). Requires intervals be
138  // normalized. The result is normalized.
139  void Complement(T maxval, IntervalSet<T> *oset) const;
140
141  // Subtract an interval set from the set. Requires intervals be
142  // normalized. The result is normalized.
143  void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
144
145  // Determines if an interval set overlaps with the set. Requires
146  // intervals be normalized.
147  bool Overlaps(const IntervalSet<T> &iset) const;
148
149  // Determines if an interval set overlaps with the set but neither
150  // is contained in the other. Requires intervals be normalized.
151  bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
152
153  // Determines if an interval set is contained within the set. Requires
154  // intervals be normalized.
155  bool Contains(const IntervalSet<T> &iset) const;
156
157  istream &Read(istream &strm) {
158    ReadType(strm, &intervals_);
159    return ReadType(strm, &count_);
160  }
161
162  ostream &Write(ostream &strm) const {
163    WriteType(strm, intervals_);
164    return WriteType(strm, count_);
165  }
166
167 private:
168  vector<Interval> intervals_;
169  T count_;
170};
171
172// Sorts; collapses overlapping and adjacent interavls; sets count.
173template <typename T>
174void IntervalSet<T>::Normalize() {
175  sort(intervals_.begin(), intervals_.end());
176
177  count_ = 0;
178  T size = 0;
179  for (T i = 0; i < intervals_.size(); ++i) {
180    Interval &inti = intervals_[i];
181    if (inti.begin == inti.end)
182      continue;
183    for (T j = i + 1; j < intervals_.size(); ++j) {
184      Interval &intj = intervals_[j];
185      if (intj.begin > inti.end)
186        break;
187      if (intj.end > inti.end)
188        inti.end = intj.end;
189      ++i;
190    }
191    count_ += inti.end - inti.begin;
192    intervals_[size++] = inti;
193  }
194  intervals_.resize(size);
195}
196
197// Intersects an interval set with the set. Requires intervals be normalized.
198// The result is normalized.
199template <typename T>
200void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
201                               IntervalSet<T> *oset) const {
202  const vector<Interval> *iintervals = iset.Intervals();
203  vector<Interval> *ointervals = oset->Intervals();
204  typename vector<Interval>::const_iterator it1 = intervals_.begin();
205  typename vector<Interval>::const_iterator it2 = iintervals->begin();
206
207  ointervals->clear();
208  oset->count_ = 0;
209
210  while (it1 != intervals_.end() && it2 != iintervals->end()) {
211    if (it1->end <= it2->begin) {
212      ++it1;
213    } else if (it2->end <= it1->begin) {
214      ++it2;
215    } else {
216      Interval interval;
217      interval.begin = max(it1->begin, it2->begin);
218      interval.end = min(it1->end, it2->end);
219      ointervals->push_back(interval);
220      oset->count_ += interval.end - interval.begin;
221      if (it1->end < it2->end)
222        ++it1;
223      else
224        ++it2;
225    }
226  }
227}
228
229// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
230// The result is normalized.
231template <typename T>
232void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
233  vector<Interval> *ointervals = oset->Intervals();
234  ointervals->clear();
235  oset->count_ = 0;
236
237  Interval interval;
238  interval.begin = 0;
239  for (typename vector<Interval>::const_iterator it = intervals_.begin();
240       it != intervals_.end();
241       ++it) {
242    interval.end = min(it->begin, maxval);
243    if (interval.begin < interval.end) {
244      ointervals->push_back(interval);
245      oset->count_ += interval.end - interval.begin;
246    }
247    interval.begin = it->end;
248  }
249  interval.end = maxval;
250  if (interval.begin < interval.end) {
251    ointervals->push_back(interval);
252    oset->count_ += interval.end - interval.begin;
253  }
254}
255
256// Subtract an interval set from the set. Requires intervals be normalized.
257// The result is normalized.
258template <typename T>
259void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
260                                IntervalSet<T> *oset) const {
261  if (intervals_.empty()) {
262    oset->Intervals()->clear();
263    oset->count_ = 0;
264  } else {
265    IntervalSet<T> cset;
266    iset.Complement(intervals_.back().end, &cset);
267    Intersect(cset, oset);
268  }
269}
270
271// Determines if an interval set overlaps with the set. Requires
272// intervals be normalized.
273template <typename T>
274bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
275  const vector<Interval> *intervals = iset.Intervals();
276  typename vector<Interval>::const_iterator it1 = intervals_.begin();
277  typename vector<Interval>::const_iterator it2 = intervals->begin();
278
279  while (it1 != intervals_.end() && it2 != intervals->end()) {
280    if (it1->end <= it2->begin) {
281      ++it1;
282    } else if (it2->end <= it1->begin) {
283      ++it2;
284    } else {
285      return true;
286    }
287  }
288  return false;
289}
290
291// Determines if an interval set overlaps with the set but neither
292// is contained in the other. Requires intervals be normalized.
293template <typename T>
294bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
295  const vector<Interval> *intervals = iset.Intervals();
296  typename vector<Interval>::const_iterator it1 = intervals_.begin();
297  typename vector<Interval>::const_iterator it2 = intervals->begin();
298  bool only1 = false;   // point in intervals_ but not intervals
299  bool only2 = false;   // point in intervals but not intervals_
300  bool overlap = false; // point in both intervals_ and intervals
301
302  while (it1 != intervals_.end() && it2 != intervals->end()) {
303    if (it1->end <= it2->begin) {  // no overlap - it1 first
304      only1 = true;
305      ++it1;
306    } else if (it2->end <= it1->begin) {  // no overlap - it2 first
307      only2 = true;
308      ++it2;
309    } else if (it2->begin == it1->begin && it2->end == it1->end) {  // equals
310      overlap = true;
311      ++it1;
312      ++it2;
313    } else if (it2->begin <= it1->begin && it2->end >= it1->end) {  // 1 c 2
314      only2 = true;
315      overlap = true;
316      ++it1;
317    } else if (it1->begin <= it2->begin && it1->end >= it2->end) {  // 2 c 1
318      only1 = true;
319      overlap = true;
320      ++it2;
321    } else {  // strict overlap
322      only1 = true;
323      only2 = true;
324      overlap = true;
325    }
326    if (only1 == true && only2 == true && overlap == true)
327      return true;
328  }
329  if (it1 != intervals_.end())
330    only1 = true;
331  if (it2 != intervals->end())
332    only2 = true;
333
334  return only1 == true && only2 == true && overlap == true;
335}
336
337// Determines if an interval set is contained within the set. Requires
338// intervals be normalized.
339template <typename T>
340bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
341  if (iset.Count() > Count())
342    return false;
343
344  const vector<Interval> *intervals = iset.Intervals();
345  typename vector<Interval>::const_iterator it1 = intervals_.begin();
346  typename vector<Interval>::const_iterator it2 = intervals->begin();
347
348  while (it1 != intervals_.end() && it2 != intervals->end()) {
349    if (it1->end <= it2->begin) {  // no overlap - it1 first
350      ++it1;
351    } else if (it2->begin < it1->begin || it2->end > it1->end) {  // no C
352      return false;
353    } else if (it2->end == it1->end) {
354      ++it1;
355      ++it2;
356    } else {
357      ++it2;
358    }
359  }
360  return it2 == intervals->end();
361}
362
363template <typename T>
364ostream &operator<<(ostream &strm, const IntervalSet<T> &s)  {
365  typedef typename IntervalSet<T>::Interval Interval;
366  const vector<Interval> *intervals = s.Intervals();
367  strm << "{";
368  for (typename vector<Interval>::const_iterator it = intervals->begin();
369       it != intervals->end();
370       ++it) {
371    if (it != intervals->begin())
372      strm << ",";
373    strm << "[" << it->begin << "," << it->end << ")";
374  }
375  strm << "}";
376  return strm;
377}
378
379}  // namespace fst
380
381#endif  // FST_LIB_INTERVAL_SET_H__
382