1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// interval-set.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to represent and operate on sets of intervals. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_INTERVAL_SET_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_INTERVAL_SET_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores and operates on a set of half-open integral intervals [a,b) 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of signed integers of type T. 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass IntervalSet { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct Interval { 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T begin; 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T end; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval() : begin(-1), end(-1) {} 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval(T b, T e) : begin(b), end(e) {} 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator<(const Interval &i) const { 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return begin < i.begin || (begin == i.begin && end > i.end); 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator==(const Interval &i) const { 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return begin == i.begin && end == i.end; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator!=(const Interval &i) const { 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return begin != i.begin || end != i.end; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson istream &Read(istream &strm) { 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T n; 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &n); 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson begin = n; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &n); 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson end = n; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return strm; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostream &Write(ostream &strm) const { 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T n = begin; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(strm, n); 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson n = end; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(strm, n); 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return strm; 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntervalSet() : count_(-1) {} 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns the interval set as a vector. 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Interval> *Intervals() { return &intervals_; } 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *Intervals() const { return &intervals_; } 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const bool Empty() const { return intervals_.empty(); } 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T Size() const { return intervals_.size(); } 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Number of points in the intervals (undefined if not normalized). 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T Count() const { return count_; } 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Clear() { 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson intervals_.clear(); 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson count_ = 0; 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Adds an interval set to the set. The result may not be normalized. 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Union(const IntervalSet<T> &iset) { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = iset.Intervals(); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename vector<Interval>::const_iterator it = intervals->begin(); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it != intervals->end(); ++it) 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson intervals_.push_back(*it); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Requires intervals be normalized. 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Member(T value) const { 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval interval(value, value); 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator lb = 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson lower_bound(intervals_.begin(), intervals_.end(), interval); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (lb == intervals_.begin()) 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return (--lb)->end > value; 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Requires intervals be normalized. 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator==(const IntervalSet<T>& iset) const { 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *(iset.Intervals()) == intervals_; 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Requires intervals be normalized. 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator!=(const IntervalSet<T>& iset) const { 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *(iset.Intervals()) != intervals_; 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Singleton() const { 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return intervals_.size() == 1 && 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson intervals_[0].begin + 1 == intervals_[0].end; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Sorts; collapses overlapping and adjacent interals; sets count. 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Normalize(); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Intersects an interval set with the set. Requires intervals be 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // normalized. The result is normalized. 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Complements the set w.r.t [0, maxval). Requires intervals be 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // normalized. The result is normalized. 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Complement(T maxval, IntervalSet<T> *oset) const; 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Subtract an interval set from the set. Requires intervals be 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // normalized. The result is normalized. 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Determines if an interval set overlaps with the set. Requires 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // intervals be normalized. 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Overlaps(const IntervalSet<T> &iset) const; 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Determines if an interval set overlaps with the set but neither 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // is contained in the other. Requires intervals be normalized. 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool StrictlyOverlaps(const IntervalSet<T> &iset) const; 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Determines if an interval set is contained within the set. Requires 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // intervals be normalized. 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Contains(const IntervalSet<T> &iset) const; 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson istream &Read(istream &strm) { 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &intervals_); 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return ReadType(strm, &count_); 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostream &Write(ostream &strm) const { 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(strm, intervals_); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return WriteType(strm, count_); 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Interval> intervals_; 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T count_; 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Sorts; collapses overlapping and adjacent interavls; sets count. 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Normalize() { 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sort(intervals_.begin(), intervals_.end()); 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson count_ = 0; 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T size = 0; 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (T i = 0; i < intervals_.size(); ++i) { 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval &inti = intervals_[i]; 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (inti.begin == inti.end) 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson continue; 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (T j = i + 1; j < intervals_.size(); ++j) { 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval &intj = intervals_[j]; 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (intj.begin > inti.end) 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson break; 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (intj.end > inti.end) 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson inti.end = intj.end; 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++i; 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson count_ += inti.end - inti.begin; 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson intervals_[size++] = inti; 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson intervals_.resize(size); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Intersects an interval set with the set. Requires intervals be normalized. 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized. 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Intersect(const IntervalSet<T> &iset, 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntervalSet<T> *oset) const { 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *iintervals = iset.Intervals(); 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Interval> *ointervals = oset->Intervals(); 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it1 = intervals_.begin(); 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it2 = iintervals->begin(); 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ointervals->clear(); 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ = 0; 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (it1 != intervals_.end() && it2 != iintervals->end()) { 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1->end <= it2->begin) { 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->end <= it1->begin) { 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval interval; 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.begin = max(it1->begin, it2->begin); 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.end = min(it1->end, it2->end); 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ointervals->push_back(interval); 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ += interval.end - interval.begin; 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1->end < it2->end) 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complements the set w.r.t [0, maxval). Requires intervals be normalized. 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized. 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const { 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Interval> *ointervals = oset->Intervals(); 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ointervals->clear(); 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ = 0; 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Interval interval; 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.begin = 0; 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename vector<Interval>::const_iterator it = intervals_.begin(); 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it != intervals_.end(); 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it) { 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.end = min(it->begin, maxval); 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (interval.begin < interval.end) { 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ointervals->push_back(interval); 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ += interval.end - interval.begin; 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.begin = it->end; 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson interval.end = maxval; 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (interval.begin < interval.end) { 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ointervals->push_back(interval); 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ += interval.end - interval.begin; 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Subtract an interval set from the set. Requires intervals be normalized. 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The result is normalized. 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid IntervalSet<T>::Difference(const IntervalSet<T> &iset, 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntervalSet<T> *oset) const { 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (intervals_.empty()) { 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->Intervals()->clear(); 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson oset->count_ = 0; 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntervalSet<T> cset; 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iset.Complement(intervals_.back().end, &cset); 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Intersect(cset, oset); 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set overlaps with the set. Requires 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intervals be normalized. 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const { 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = iset.Intervals(); 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it1 = intervals_.begin(); 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it2 = intervals->begin(); 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (it1 != intervals_.end() && it2 != intervals->end()) { 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1->end <= it2->begin) { 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->end <= it1->begin) { 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set overlaps with the set but neither 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// is contained in the other. Requires intervals be normalized. 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const { 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = iset.Intervals(); 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it1 = intervals_.begin(); 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it2 = intervals->begin(); 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool only1 = false; // point in intervals_ but not intervals 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool only2 = false; // point in intervals but not intervals_ 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool overlap = false; // point in both intervals_ and intervals 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (it1 != intervals_.end() && it2 != intervals->end()) { 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1->end <= it2->begin) { // no overlap - it1 first 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only1 = true; 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->end <= it1->begin) { // no overlap - it2 first 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only2 = true; 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson overlap = true; 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only2 = true; 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson overlap = true; 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only1 = true; 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson overlap = true; 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // strict overlap 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only1 = true; 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only2 = true; 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson overlap = true; 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (only1 == true && only2 == true && overlap == true) 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1 != intervals_.end()) 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only1 = true; 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it2 != intervals->end()) 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson only2 = true; 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return only1 == true && only2 == true && overlap == true; 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines if an interval set is contained within the set. Requires 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intervals be normalized. 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const { 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (iset.Count() > Count()) 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = iset.Intervals(); 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it1 = intervals_.begin(); 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename vector<Interval>::const_iterator it2 = intervals->begin(); 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (it1 != intervals_.end() && it2 != intervals->end()) { 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it1->end <= it2->begin) { // no overlap - it1 first 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (it2->end == it1->end) { 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it1; 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it2; 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return it2 == intervals->end(); 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename T> 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonostream &operator<<(ostream &strm, const IntervalSet<T> &s) { 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename IntervalSet<T>::Interval Interval; 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = s.Intervals(); 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm << "{"; 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename vector<Interval>::const_iterator it = intervals->begin(); 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it != intervals->end(); 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it) { 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it != intervals->begin()) 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm << ","; 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm << "[" << it->begin << "," << it->end << ")"; 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm << "}"; 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return strm; 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_INTERVAL_SET_H__ 382