1// interval-set.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Class to represent and operate on sets of intervals. 20 21#ifndef FST_LIB_INTERVAL_SET_H__ 22#define FST_LIB_INTERVAL_SET_H__ 23 24#include <iostream> 25#include <vector> 26using std::vector; 27 28 29#include <fst/util.h> 30 31 32namespace fst { 33 34// Stores and operates on a set of half-open integral intervals [a,b) 35// of signed integers of type T. 36template <typename T> 37class IntervalSet { 38 public: 39 struct Interval { 40 T begin; 41 T end; 42 43 Interval() : begin(-1), end(-1) {} 44 45 Interval(T b, T e) : begin(b), end(e) {} 46 47 bool operator<(const Interval &i) const { 48 return begin < i.begin || (begin == i.begin && end > i.end); 49 } 50 51 bool operator==(const Interval &i) const { 52 return begin == i.begin && end == i.end; 53 } 54 55 bool operator!=(const Interval &i) const { 56 return begin != i.begin || end != i.end; 57 } 58 59 istream &Read(istream &strm) { 60 T n; 61 ReadType(strm, &n); 62 begin = n; 63 ReadType(strm, &n); 64 end = n; 65 return strm; 66 } 67 68 ostream &Write(ostream &strm) const { 69 T n = begin; 70 WriteType(strm, n); 71 n = end; 72 WriteType(strm, n); 73 return strm; 74 } 75 }; 76 77 IntervalSet() : count_(-1) {} 78 79 // Returns the interval set as a vector. 80 vector<Interval> *Intervals() { return &intervals_; } 81 82 const vector<Interval> *Intervals() const { return &intervals_; } 83 84 bool Empty() const { return intervals_.empty(); } 85 86 T Size() const { return intervals_.size(); } 87 88 // Number of points in the intervals (undefined if not normalized). 89 T Count() const { return count_; } 90 91 void Clear() { 92 intervals_.clear(); 93 count_ = 0; 94 } 95 96 // Adds an interval set to the set. The result may not be normalized. 97 void Union(const IntervalSet<T> &iset) { 98 const vector<Interval> *intervals = iset.Intervals(); 99 for (typename vector<Interval>::const_iterator it = intervals->begin(); 100 it != intervals->end(); ++it) 101 intervals_.push_back(*it); 102 } 103 104 // Requires intervals be normalized. 105 bool Member(T value) const { 106 Interval interval(value, value); 107 typename vector<Interval>::const_iterator lb = 108 lower_bound(intervals_.begin(), intervals_.end(), interval); 109 if (lb == intervals_.begin()) 110 return false; 111 return (--lb)->end > value; 112 } 113 114 // Requires intervals be normalized. 115 bool operator==(const IntervalSet<T>& iset) const { 116 return *(iset.Intervals()) == intervals_; 117 } 118 119 // Requires intervals be normalized. 120 bool operator!=(const IntervalSet<T>& iset) const { 121 return *(iset.Intervals()) != intervals_; 122 } 123 124 bool Singleton() const { 125 return intervals_.size() == 1 && 126 intervals_[0].begin + 1 == intervals_[0].end; 127 } 128 129 130 // Sorts; collapses overlapping and adjacent interals; sets count. 131 void Normalize(); 132 133 // Intersects an interval set with the set. Requires intervals be 134 // normalized. The result is normalized. 135 void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; 136 137 // Complements the set w.r.t [0, maxval). Requires intervals be 138 // normalized. The result is normalized. 139 void Complement(T maxval, IntervalSet<T> *oset) const; 140 141 // Subtract an interval set from the set. Requires intervals be 142 // normalized. The result is normalized. 143 void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; 144 145 // Determines if an interval set overlaps with the set. Requires 146 // intervals be normalized. 147 bool Overlaps(const IntervalSet<T> &iset) const; 148 149 // Determines if an interval set overlaps with the set but neither 150 // is contained in the other. Requires intervals be normalized. 151 bool StrictlyOverlaps(const IntervalSet<T> &iset) const; 152 153 // Determines if an interval set is contained within the set. Requires 154 // intervals be normalized. 155 bool Contains(const IntervalSet<T> &iset) const; 156 157 istream &Read(istream &strm) { 158 ReadType(strm, &intervals_); 159 return ReadType(strm, &count_); 160 } 161 162 ostream &Write(ostream &strm) const { 163 WriteType(strm, intervals_); 164 return WriteType(strm, count_); 165 } 166 167 private: 168 vector<Interval> intervals_; 169 T count_; 170}; 171 172// Sorts; collapses overlapping and adjacent interavls; sets count. 173template <typename T> 174void IntervalSet<T>::Normalize() { 175 sort(intervals_.begin(), intervals_.end()); 176 177 count_ = 0; 178 T size = 0; 179 for (T i = 0; i < intervals_.size(); ++i) { 180 Interval &inti = intervals_[i]; 181 if (inti.begin == inti.end) 182 continue; 183 for (T j = i + 1; j < intervals_.size(); ++j) { 184 Interval &intj = intervals_[j]; 185 if (intj.begin > inti.end) 186 break; 187 if (intj.end > inti.end) 188 inti.end = intj.end; 189 ++i; 190 } 191 count_ += inti.end - inti.begin; 192 intervals_[size++] = inti; 193 } 194 intervals_.resize(size); 195} 196 197// Intersects an interval set with the set. Requires intervals be normalized. 198// The result is normalized. 199template <typename T> 200void IntervalSet<T>::Intersect(const IntervalSet<T> &iset, 201 IntervalSet<T> *oset) const { 202 const vector<Interval> *iintervals = iset.Intervals(); 203 vector<Interval> *ointervals = oset->Intervals(); 204 typename vector<Interval>::const_iterator it1 = intervals_.begin(); 205 typename vector<Interval>::const_iterator it2 = iintervals->begin(); 206 207 ointervals->clear(); 208 oset->count_ = 0; 209 210 while (it1 != intervals_.end() && it2 != iintervals->end()) { 211 if (it1->end <= it2->begin) { 212 ++it1; 213 } else if (it2->end <= it1->begin) { 214 ++it2; 215 } else { 216 Interval interval; 217 interval.begin = max(it1->begin, it2->begin); 218 interval.end = min(it1->end, it2->end); 219 ointervals->push_back(interval); 220 oset->count_ += interval.end - interval.begin; 221 if (it1->end < it2->end) 222 ++it1; 223 else 224 ++it2; 225 } 226 } 227} 228 229// Complements the set w.r.t [0, maxval). Requires intervals be normalized. 230// The result is normalized. 231template <typename T> 232void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const { 233 vector<Interval> *ointervals = oset->Intervals(); 234 ointervals->clear(); 235 oset->count_ = 0; 236 237 Interval interval; 238 interval.begin = 0; 239 for (typename vector<Interval>::const_iterator it = intervals_.begin(); 240 it != intervals_.end(); 241 ++it) { 242 interval.end = min(it->begin, maxval); 243 if (interval.begin < interval.end) { 244 ointervals->push_back(interval); 245 oset->count_ += interval.end - interval.begin; 246 } 247 interval.begin = it->end; 248 } 249 interval.end = maxval; 250 if (interval.begin < interval.end) { 251 ointervals->push_back(interval); 252 oset->count_ += interval.end - interval.begin; 253 } 254} 255 256// Subtract an interval set from the set. Requires intervals be normalized. 257// The result is normalized. 258template <typename T> 259void IntervalSet<T>::Difference(const IntervalSet<T> &iset, 260 IntervalSet<T> *oset) const { 261 if (intervals_.empty()) { 262 oset->Intervals()->clear(); 263 oset->count_ = 0; 264 } else { 265 IntervalSet<T> cset; 266 iset.Complement(intervals_.back().end, &cset); 267 Intersect(cset, oset); 268 } 269} 270 271// Determines if an interval set overlaps with the set. Requires 272// intervals be normalized. 273template <typename T> 274bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const { 275 const vector<Interval> *intervals = iset.Intervals(); 276 typename vector<Interval>::const_iterator it1 = intervals_.begin(); 277 typename vector<Interval>::const_iterator it2 = intervals->begin(); 278 279 while (it1 != intervals_.end() && it2 != intervals->end()) { 280 if (it1->end <= it2->begin) { 281 ++it1; 282 } else if (it2->end <= it1->begin) { 283 ++it2; 284 } else { 285 return true; 286 } 287 } 288 return false; 289} 290 291// Determines if an interval set overlaps with the set but neither 292// is contained in the other. Requires intervals be normalized. 293template <typename T> 294bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const { 295 const vector<Interval> *intervals = iset.Intervals(); 296 typename vector<Interval>::const_iterator it1 = intervals_.begin(); 297 typename vector<Interval>::const_iterator it2 = intervals->begin(); 298 bool only1 = false; // point in intervals_ but not intervals 299 bool only2 = false; // point in intervals but not intervals_ 300 bool overlap = false; // point in both intervals_ and intervals 301 302 while (it1 != intervals_.end() && it2 != intervals->end()) { 303 if (it1->end <= it2->begin) { // no overlap - it1 first 304 only1 = true; 305 ++it1; 306 } else if (it2->end <= it1->begin) { // no overlap - it2 first 307 only2 = true; 308 ++it2; 309 } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals 310 overlap = true; 311 ++it1; 312 ++it2; 313 } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 314 only2 = true; 315 overlap = true; 316 ++it1; 317 } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 318 only1 = true; 319 overlap = true; 320 ++it2; 321 } else { // strict overlap 322 only1 = true; 323 only2 = true; 324 overlap = true; 325 } 326 if (only1 == true && only2 == true && overlap == true) 327 return true; 328 } 329 if (it1 != intervals_.end()) 330 only1 = true; 331 if (it2 != intervals->end()) 332 only2 = true; 333 334 return only1 == true && only2 == true && overlap == true; 335} 336 337// Determines if an interval set is contained within the set. Requires 338// intervals be normalized. 339template <typename T> 340bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const { 341 if (iset.Count() > Count()) 342 return false; 343 344 const vector<Interval> *intervals = iset.Intervals(); 345 typename vector<Interval>::const_iterator it1 = intervals_.begin(); 346 typename vector<Interval>::const_iterator it2 = intervals->begin(); 347 348 while (it1 != intervals_.end() && it2 != intervals->end()) { 349 if (it1->end <= it2->begin) { // no overlap - it1 first 350 ++it1; 351 } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C 352 return false; 353 } else if (it2->end == it1->end) { 354 ++it1; 355 ++it2; 356 } else { 357 ++it2; 358 } 359 } 360 return it2 == intervals->end(); 361} 362 363template <typename T> 364ostream &operator<<(ostream &strm, const IntervalSet<T> &s) { 365 typedef typename IntervalSet<T>::Interval Interval; 366 const vector<Interval> *intervals = s.Intervals(); 367 strm << "{"; 368 for (typename vector<Interval>::const_iterator it = intervals->begin(); 369 it != intervals->end(); 370 ++it) { 371 if (it != intervals->begin()) 372 strm << ","; 373 strm << "[" << it->begin << "," << it->end << ")"; 374 } 375 strm << "}"; 376 return strm; 377} 378 379} // namespace fst 380 381#endif // FST_LIB_INTERVAL_SET_H__ 382