1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// interval-set.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to represent and operate on sets of intervals.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_INTERVAL_SET_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_INTERVAL_SET_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores and operates on a set of half-open integral intervals [a,b)
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of signed integers of type T.
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass IntervalSet {
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct Interval {
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T begin;
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T end;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Interval() : begin(-1), end(-1) {}
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Interval(T b, T e) : begin(b), end(e) {}
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator<(const Interval &i) const {
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return begin < i.begin || (begin == i.begin && end > i.end);
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator==(const Interval &i) const {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return begin == i.begin && end == i.end;
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator!=(const Interval &i) const {
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return begin != i.begin || end != i.end;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    istream &Read(istream &strm) {
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      T n;
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(strm, &n);
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      begin = n;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(strm, &n);
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      end = n;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return strm;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ostream &Write(ostream &strm) const {
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      T n = begin;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      WriteType(strm, n);
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      n = end;
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      WriteType(strm, n);
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return strm;
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntervalSet() : count_(-1) {}
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns the interval set as a vector.
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Interval> *Intervals() { return &intervals_; }
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *Intervals() const { return &intervals_; }
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  bool Empty() const { return intervals_.empty(); }
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  T Size() const { return intervals_.size(); }
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Number of points in the intervals (undefined if not normalized).
895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  T Count() const { return count_; }
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Clear() {
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    intervals_.clear();
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    count_ = 0;
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Adds an interval set to the set. The result may not be normalized.
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Union(const IntervalSet<T> &iset) {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const vector<Interval> *intervals = iset.Intervals();
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename vector<Interval>::const_iterator it = intervals->begin();
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         it != intervals->end(); ++it)
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      intervals_.push_back(*it);
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Requires intervals be normalized.
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Member(T value) const {
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Interval interval(value, value);
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename vector<Interval>::const_iterator lb =
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        lower_bound(intervals_.begin(), intervals_.end(), interval);
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (lb == intervals_.begin())
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return (--lb)->end > value;
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Requires intervals be normalized.
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator==(const IntervalSet<T>& iset) const {
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *(iset.Intervals()) == intervals_;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Requires intervals be normalized.
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator!=(const IntervalSet<T>& iset) const {
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *(iset.Intervals()) != intervals_;
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Singleton() const {
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return intervals_.size() == 1 &&
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        intervals_[0].begin + 1 == intervals_[0].end;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Sorts; collapses overlapping and adjacent interals; sets count.
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Normalize();
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Intersects an interval set with the set. Requires intervals be
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // normalized. The result is normalized.
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Complements the set w.r.t [0, maxval). Requires intervals be
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // normalized. The result is normalized.
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Complement(T maxval, IntervalSet<T> *oset) const;
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Subtract an interval set from the set. Requires intervals be
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // normalized. The result is normalized.
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const;
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Determines if an interval set overlaps with the set. Requires
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // intervals be normalized.
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Overlaps(const IntervalSet<T> &iset) const;
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Determines if an interval set overlaps with the set but neither
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // is contained in the other. Requires intervals be normalized.
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool StrictlyOverlaps(const IntervalSet<T> &iset) const;
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Determines if an interval set is contained within the set. Requires
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // intervals be normalized.
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Contains(const IntervalSet<T> &iset) const;
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  istream &Read(istream &strm) {
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(strm, &intervals_);
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ReadType(strm, &count_);
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ostream &Write(ostream &strm) const {
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(strm, intervals_);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return WriteType(strm, count_);
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Interval> intervals_;
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T count_;
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Sorts; collapses overlapping and adjacent interavls; sets count.
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Normalize() {
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  sort(intervals_.begin(), intervals_.end());
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  count_ = 0;
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T size = 0;
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (T i = 0; i < intervals_.size(); ++i) {
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Interval &inti = intervals_[i];
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (inti.begin == inti.end)
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      continue;
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (T j = i + 1; j < intervals_.size(); ++j) {
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Interval &intj = intervals_[j];
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (intj.begin > inti.end)
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        break;
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (intj.end > inti.end)
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        inti.end = intj.end;
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++i;
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    count_ += inti.end - inti.begin;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    intervals_[size++] = inti;
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  intervals_.resize(size);
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Intersects an interval set with the set. Requires intervals be normalized.
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized.
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               IntervalSet<T> *oset) const {
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *iintervals = iset.Intervals();
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Interval> *ointervals = oset->Intervals();
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it1 = intervals_.begin();
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it2 = iintervals->begin();
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ointervals->clear();
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  oset->count_ = 0;
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (it1 != intervals_.end() && it2 != iintervals->end()) {
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it1->end <= it2->begin) {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->end <= it1->begin) {
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Interval interval;
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      interval.begin = max(it1->begin, it2->begin);
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      interval.end = min(it1->end, it2->end);
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ointervals->push_back(interval);
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      oset->count_ += interval.end - interval.begin;
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (it1->end < it2->end)
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++it1;
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      else
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++it2;
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complements the set w.r.t [0, maxval). Requires intervals be normalized.
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized.
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Interval> *ointervals = oset->Intervals();
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ointervals->clear();
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  oset->count_ = 0;
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Interval interval;
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  interval.begin = 0;
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (typename vector<Interval>::const_iterator it = intervals_.begin();
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       it != intervals_.end();
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       ++it) {
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    interval.end = min(it->begin, maxval);
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (interval.begin < interval.end) {
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ointervals->push_back(interval);
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      oset->count_ += interval.end - interval.begin;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    interval.begin = it->end;
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  interval.end = maxval;
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (interval.begin < interval.end) {
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ointervals->push_back(interval);
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    oset->count_ += interval.end - interval.begin;
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Subtract an interval set from the set. Requires intervals be normalized.
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized.
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Difference(const IntervalSet<T> &iset,
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                IntervalSet<T> *oset) const {
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (intervals_.empty()) {
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    oset->Intervals()->clear();
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    oset->count_ = 0;
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IntervalSet<T> cset;
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    iset.Complement(intervals_.back().end, &cset);
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Intersect(cset, oset);
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set overlaps with the set. Requires
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intervals be normalized.
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *intervals = iset.Intervals();
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it1 = intervals_.begin();
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it2 = intervals->begin();
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (it1 != intervals_.end() && it2 != intervals->end()) {
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it1->end <= it2->begin) {
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->end <= it1->begin) {
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return false;
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set overlaps with the set but neither
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// is contained in the other. Requires intervals be normalized.
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *intervals = iset.Intervals();
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it1 = intervals_.begin();
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it2 = intervals->begin();
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool only1 = false;   // point in intervals_ but not intervals
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool only2 = false;   // point in intervals but not intervals_
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool overlap = false; // point in both intervals_ and intervals
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (it1 != intervals_.end() && it2 != intervals->end()) {
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it1->end <= it2->begin) {  // no overlap - it1 first
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only1 = true;
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->end <= it1->begin) {  // no overlap - it2 first
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only2 = true;
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->begin == it1->begin && it2->end == it1->end) {  // equals
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      overlap = true;
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->begin <= it1->begin && it2->end >= it1->end) {  // 1 c 2
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only2 = true;
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      overlap = true;
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it1->begin <= it2->begin && it1->end >= it2->end) {  // 2 c 1
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only1 = true;
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      overlap = true;
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // strict overlap
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only1 = true;
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      only2 = true;
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      overlap = true;
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (only1 == true && only2 == true && overlap == true)
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (it1 != intervals_.end())
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    only1 = true;
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (it2 != intervals->end())
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    only2 = true;
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return only1 == true && only2 == true && overlap == true;
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set is contained within the set. Requires
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intervals be normalized.
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (iset.Count() > Count())
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *intervals = iset.Intervals();
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it1 = intervals_.begin();
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename vector<Interval>::const_iterator it2 = intervals->begin();
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (it1 != intervals_.end() && it2 != intervals->end()) {
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it1->end <= it2->begin) {  // no overlap - it1 first
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->begin < it1->begin || it2->end > it1->end) {  // no C
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (it2->end == it1->end) {
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it1;
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++it2;
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return it2 == intervals->end();
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T>
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonostream &operator<<(ostream &strm, const IntervalSet<T> &s)  {
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename IntervalSet<T>::Interval Interval;
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Interval> *intervals = s.Intervals();
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  strm << "{";
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (typename vector<Interval>::const_iterator it = intervals->begin();
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       it != intervals->end();
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       ++it) {
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it != intervals->begin())
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      strm << ",";
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm << "[" << it->begin << "," << it->end << ")";
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  strm << "}";
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return strm;
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_INTERVAL_SET_H__
382