rangelib.py revision 724fb897fcb1ec783a87ebe5801ae3a2b68edfb5
1from __future__ import print_function 2import heapq 3import itertools 4 5__all__ = ["RangeSet"] 6 7class RangeSet(object): 8 """A RangeSet represents a set of nonoverlapping ranges on the 9 integers (ie, a set of integers, but efficient when the set contains 10 lots of runs.""" 11 12 def __init__(self, data=None): 13 if data: 14 self.data = tuple(self._remove_pairs(data)) 15 else: 16 self.data = () 17 18 def __iter__(self): 19 for i in range(0, len(self.data), 2): 20 yield self.data[i:i+2] 21 22 def __eq__(self, other): 23 return self.data == other.data 24 def __ne__(self, other): 25 return self.data != other.data 26 def __nonzero__(self): 27 return bool(self.data) 28 29 def __str__(self): 30 if not self.data: 31 return "empty" 32 else: 33 return self.to_string() 34 35 @classmethod 36 def parse(cls, text): 37 """Parse a text string consisting of a space-separated list of 38 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to 39 include both their ends (so the above example represents 18 40 individual blocks. Returns a RangeSet object. 41 42 If the input has all its blocks in increasing order, then returned 43 RangeSet will have an extra attribute 'monotonic' that is set to 44 True. For example the input "10-20 30" is monotonic, but the input 45 "15-20 30 10-14" is not, even though they represent the same set 46 of blocks (and the two RangeSets will compare equal with ==). 47 """ 48 49 data = [] 50 last = -1 51 monotonic = True 52 for p in text.split(): 53 if "-" in p: 54 s, e = p.split("-") 55 data.append(int(s)) 56 data.append(int(e)+1) 57 if last <= s <= e: 58 last = e 59 else: 60 monotonic = False 61 else: 62 s = int(p) 63 data.append(s) 64 data.append(s+1) 65 if last <= s: 66 last = s+1 67 else: 68 monotonic = True 69 data.sort() 70 r = RangeSet(cls._remove_pairs(data)) 71 r.monotonic = monotonic 72 return r 73 74 @staticmethod 75 def _remove_pairs(source): 76 last = None 77 for i in source: 78 if i == last: 79 last = None 80 else: 81 if last is not None: 82 yield last 83 last = i 84 if last is not None: 85 yield last 86 87 def to_string(self): 88 out = [] 89 for i in range(0, len(self.data), 2): 90 s, e = self.data[i:i+2] 91 if e == s+1: 92 out.append(str(s)) 93 else: 94 out.append(str(s) + "-" + str(e-1)) 95 return " ".join(out) 96 97 def to_string_raw(self): 98 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 99 100 def union(self, other): 101 """Return a new RangeSet representing the union of this RangeSet 102 with the argument.""" 103 out = [] 104 z = 0 105 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 106 zip(other.data, itertools.cycle((+1, -1)))): 107 if (z == 0 and d == 1) or (z == 1 and d == -1): 108 out.append(p) 109 z += d 110 return RangeSet(data=out) 111 112 def intersect(self, other): 113 """Return a new RangeSet representing the intersection of this 114 RangeSet with the argument.""" 115 out = [] 116 z = 0 117 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 118 zip(other.data, itertools.cycle((+1, -1)))): 119 if (z == 1 and d == 1) or (z == 2 and d == -1): 120 out.append(p) 121 z += d 122 return RangeSet(data=out) 123 124 def subtract(self, other): 125 """Return a new RangeSet representing subtracting the argument 126 from this RangeSet.""" 127 128 out = [] 129 z = 0 130 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 131 zip(other.data, itertools.cycle((-1, +1)))): 132 if (z == 0 and d == 1) or (z == 1 and d == -1): 133 out.append(p) 134 z += d 135 return RangeSet(data=out) 136 137 def overlaps(self, other): 138 """Returns true if the argument has a nonempty overlap with this 139 RangeSet.""" 140 141 # This is like intersect, but we can stop as soon as we discover the 142 # output is going to be nonempty. 143 z = 0 144 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 145 zip(other.data, itertools.cycle((+1, -1)))): 146 if (z == 1 and d == 1) or (z == 2 and d == -1): 147 return True 148 z += d 149 return False 150 151 def size(self): 152 """Returns the total size of the RangeSet (ie, how many integers 153 are in the set).""" 154 155 total = 0 156 for i, p in enumerate(self.data): 157 if i % 2: 158 total += p 159 else: 160 total -= p 161 return total 162