rangelib.py revision 724fb897fcb1ec783a87ebe5801ae3a2b68edfb5
1from __future__ import print_function
2import heapq
3import itertools
4
5__all__ = ["RangeSet"]
6
7class RangeSet(object):
8  """A RangeSet represents a set of nonoverlapping ranges on the
9  integers (ie, a set of integers, but efficient when the set contains
10  lots of runs."""
11
12  def __init__(self, data=None):
13    if data:
14      self.data = tuple(self._remove_pairs(data))
15    else:
16      self.data = ()
17
18  def __iter__(self):
19    for i in range(0, len(self.data), 2):
20      yield self.data[i:i+2]
21
22  def __eq__(self, other):
23    return self.data == other.data
24  def __ne__(self, other):
25    return self.data != other.data
26  def __nonzero__(self):
27    return bool(self.data)
28
29  def __str__(self):
30    if not self.data:
31      return "empty"
32    else:
33      return self.to_string()
34
35  @classmethod
36  def parse(cls, text):
37    """Parse a text string consisting of a space-separated list of
38    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
39    include both their ends (so the above example represents 18
40    individual blocks.  Returns a RangeSet object.
41
42    If the input has all its blocks in increasing order, then returned
43    RangeSet will have an extra attribute 'monotonic' that is set to
44    True.  For example the input "10-20 30" is monotonic, but the input
45    "15-20 30 10-14" is not, even though they represent the same set
46    of blocks (and the two RangeSets will compare equal with ==).
47    """
48
49    data = []
50    last = -1
51    monotonic = True
52    for p in text.split():
53      if "-" in p:
54        s, e = p.split("-")
55        data.append(int(s))
56        data.append(int(e)+1)
57        if last <= s <= e:
58          last = e
59        else:
60          monotonic = False
61      else:
62        s = int(p)
63        data.append(s)
64        data.append(s+1)
65        if last <= s:
66          last = s+1
67        else:
68          monotonic = True
69    data.sort()
70    r = RangeSet(cls._remove_pairs(data))
71    r.monotonic = monotonic
72    return r
73
74  @staticmethod
75  def _remove_pairs(source):
76    last = None
77    for i in source:
78      if i == last:
79        last = None
80      else:
81        if last is not None:
82          yield last
83        last = i
84    if last is not None:
85      yield last
86
87  def to_string(self):
88    out = []
89    for i in range(0, len(self.data), 2):
90      s, e = self.data[i:i+2]
91      if e == s+1:
92        out.append(str(s))
93      else:
94        out.append(str(s) + "-" + str(e-1))
95    return " ".join(out)
96
97  def to_string_raw(self):
98    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
99
100  def union(self, other):
101    """Return a new RangeSet representing the union of this RangeSet
102    with the argument."""
103    out = []
104    z = 0
105    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
106                            zip(other.data, itertools.cycle((+1, -1)))):
107      if (z == 0 and d == 1) or (z == 1 and d == -1):
108        out.append(p)
109      z += d
110    return RangeSet(data=out)
111
112  def intersect(self, other):
113    """Return a new RangeSet representing the intersection of this
114    RangeSet with the argument."""
115    out = []
116    z = 0
117    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
118                            zip(other.data, itertools.cycle((+1, -1)))):
119      if (z == 1 and d == 1) or (z == 2 and d == -1):
120        out.append(p)
121      z += d
122    return RangeSet(data=out)
123
124  def subtract(self, other):
125    """Return a new RangeSet representing subtracting the argument
126    from this RangeSet."""
127
128    out = []
129    z = 0
130    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
131                            zip(other.data, itertools.cycle((-1, +1)))):
132      if (z == 0 and d == 1) or (z == 1 and d == -1):
133        out.append(p)
134      z += d
135    return RangeSet(data=out)
136
137  def overlaps(self, other):
138    """Returns true if the argument has a nonempty overlap with this
139    RangeSet."""
140
141    # This is like intersect, but we can stop as soon as we discover the
142    # output is going to be nonempty.
143    z = 0
144    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
145                            zip(other.data, itertools.cycle((+1, -1)))):
146      if (z == 1 and d == 1) or (z == 2 and d == -1):
147        return True
148      z += d
149    return False
150
151  def size(self):
152    """Returns the total size of the RangeSet (ie, how many integers
153    are in the set)."""
154
155    total = 0
156    for i, p in enumerate(self.data):
157      if i % 2:
158        total += p
159      else:
160        total -= p
161    return total
162