rangelib.py revision 2fd2c9bfd6c9beeaeccb4632b785227fe56c6006
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    self.monotonic = False
28    if isinstance(data, str):
29      self._parse_internal(data)
30    elif data:
31      self.data = tuple(self._remove_pairs(data))
32    else:
33      self.data = ()
34
35  def __iter__(self):
36    for i in range(0, len(self.data), 2):
37      yield self.data[i:i+2]
38
39  def __eq__(self, other):
40    return self.data == other.data
41  def __ne__(self, other):
42    return self.data != other.data
43  def __nonzero__(self):
44    return bool(self.data)
45
46  def __str__(self):
47    if not self.data:
48      return "empty"
49    else:
50      return self.to_string()
51
52  def __repr__(self):
53    return '<RangeSet("' + self.to_string() + '")>'
54
55  @classmethod
56  def parse(cls, text):
57    """Parse a text string consisting of a space-separated list of
58    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
59    include both their ends (so the above example represents 18
60    individual blocks.  Returns a RangeSet object.
61
62    If the input has all its blocks in increasing order, then returned
63    RangeSet will have an extra attribute 'monotonic' that is set to
64    True.  For example the input "10-20 30" is monotonic, but the input
65    "15-20 30 10-14" is not, even though they represent the same set
66    of blocks (and the two RangeSets will compare equal with ==).
67    """
68    return cls(text)
69
70  def _parse_internal(self, text):
71    data = []
72    last = -1
73    monotonic = True
74    for p in text.split():
75      if "-" in p:
76        s, e = p.split("-")
77        data.append(int(s))
78        data.append(int(e)+1)
79        if last <= s <= e:
80          last = e
81        else:
82          monotonic = False
83      else:
84        s = int(p)
85        data.append(s)
86        data.append(s+1)
87        if last <= s:
88          last = s+1
89        else:
90          monotonic = True
91    data.sort()
92    self.data = tuple(self._remove_pairs(data))
93    self.monotonic = monotonic
94
95  @staticmethod
96  def _remove_pairs(source):
97    last = None
98    for i in source:
99      if i == last:
100        last = None
101      else:
102        if last is not None:
103          yield last
104        last = i
105    if last is not None:
106      yield last
107
108  def to_string(self):
109    out = []
110    for i in range(0, len(self.data), 2):
111      s, e = self.data[i:i+2]
112      if e == s+1:
113        out.append(str(s))
114      else:
115        out.append(str(s) + "-" + str(e-1))
116    return " ".join(out)
117
118  def to_string_raw(self):
119    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
120
121  def union(self, other):
122    """Return a new RangeSet representing the union of this RangeSet
123    with the argument.
124
125    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
126    <RangeSet("10-34")>
127    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
128    <RangeSet("10-19 22 30-34")>
129    """
130    out = []
131    z = 0
132    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
133                            zip(other.data, itertools.cycle((+1, -1)))):
134      if (z == 0 and d == 1) or (z == 1 and d == -1):
135        out.append(p)
136      z += d
137    return RangeSet(data=out)
138
139  def intersect(self, other):
140    """Return a new RangeSet representing the intersection of this
141    RangeSet with the argument.
142
143    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
144    <RangeSet("18-19 30-32")>
145    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
146    <RangeSet("")>
147    """
148    out = []
149    z = 0
150    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
151                            zip(other.data, itertools.cycle((+1, -1)))):
152      if (z == 1 and d == 1) or (z == 2 and d == -1):
153        out.append(p)
154      z += d
155    return RangeSet(data=out)
156
157  def subtract(self, other):
158    """Return a new RangeSet representing subtracting the argument
159    from this RangeSet.
160
161    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
162    <RangeSet("10-17 33-34")>
163    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
164    <RangeSet("10-19 30-34")>
165    """
166
167    out = []
168    z = 0
169    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
170                            zip(other.data, itertools.cycle((-1, +1)))):
171      if (z == 0 and d == 1) or (z == 1 and d == -1):
172        out.append(p)
173      z += d
174    return RangeSet(data=out)
175
176  def overlaps(self, other):
177    """Returns true if the argument has a nonempty overlap with this
178    RangeSet.
179
180    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
181    True
182    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
183    False
184    """
185
186    # This is like intersect, but we can stop as soon as we discover the
187    # output is going to be nonempty.
188    z = 0
189    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
190                            zip(other.data, itertools.cycle((+1, -1)))):
191      if (z == 1 and d == 1) or (z == 2 and d == -1):
192        return True
193      z += d
194    return False
195
196  def size(self):
197    """Returns the total size of the RangeSet (ie, how many integers
198    are in the set).
199
200    >>> RangeSet("10-19 30-34").size()
201    15
202    """
203
204    total = 0
205    for i, p in enumerate(self.data):
206      if i % 2:
207        total += p
208      else:
209        total -= p
210    return total
211
212  def map_within(self, other):
213    """'other' should be a subset of 'self'.  Returns a RangeSet
214    representing what 'other' would get translated to if the integers
215    of 'self' were translated down to be contiguous starting at zero.
216
217    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
218    <RangeSet("3-4")>
219    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
220    <RangeSet("3-4")>
221    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
222    <RangeSet("7-12")>
223    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
224    <RangeSet("2-3 7-12")>
225    """
226
227    out = []
228    offset = 0
229    start = None
230    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
231                            zip(other.data, itertools.cycle((-1, +1)))):
232      if d == -5:
233        start = p
234      elif d == +5:
235        offset += p-start
236        start = None
237      else:
238        out.append(offset + p - start)
239    return RangeSet(data=out)
240
241  def extend(self, n):
242    """Extend the RangeSet by 'n' blocks.
243
244    The lower bound is guaranteed to be non-negative.
245
246    >>> RangeSet("0-9").extend(1)
247    <RangeSet("0-10")>
248    >>> RangeSet("10-19").extend(15)
249    <RangeSet("0-34")>
250    >>> RangeSet("10-19 30-39").extend(4)
251    <RangeSet("6-23 26-43")>
252    >>> RangeSet("10-19 30-39").extend(10)
253    <RangeSet("0-49")>
254    """
255    out = self
256    for i in range(0, len(self.data), 2):
257      s, e = self.data[i:i+2]
258      s1 = max(0, s - n)
259      e1 = e + n
260      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
261    return out
262
263
264if __name__ == "__main__":
265  import doctest
266  doctest.testmod()
267