rangelib.py revision 9a5caf2c30e5dcb19823dff328de1cfb140a2799
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    # TODO(tbao): monotonic is broken when passing in a tuple.
28    self.monotonic = False
29    if isinstance(data, str):
30      self._parse_internal(data)
31    elif data:
32      self.data = tuple(self._remove_pairs(data))
33    else:
34      self.data = ()
35
36  def __iter__(self):
37    for i in range(0, len(self.data), 2):
38      yield self.data[i:i+2]
39
40  def __eq__(self, other):
41    return self.data == other.data
42  def __ne__(self, other):
43    return self.data != other.data
44  def __nonzero__(self):
45    return bool(self.data)
46
47  def __str__(self):
48    if not self.data:
49      return "empty"
50    else:
51      return self.to_string()
52
53  def __repr__(self):
54    return '<RangeSet("' + self.to_string() + '")>'
55
56  @classmethod
57  def parse(cls, text):
58    """Parse a text string consisting of a space-separated list of
59    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
60    include both their ends (so the above example represents 18
61    individual blocks.  Returns a RangeSet object.
62
63    If the input has all its blocks in increasing order, then returned
64    RangeSet will have an extra attribute 'monotonic' that is set to
65    True.  For example the input "10-20 30" is monotonic, but the input
66    "15-20 30 10-14" is not, even though they represent the same set
67    of blocks (and the two RangeSets will compare equal with ==).
68    """
69    return cls(text)
70
71  def _parse_internal(self, text):
72    data = []
73    last = -1
74    monotonic = True
75    for p in text.split():
76      if "-" in p:
77        s, e = p.split("-")
78        data.append(int(s))
79        data.append(int(e)+1)
80        if last <= s <= e:
81          last = e
82        else:
83          monotonic = False
84      else:
85        s = int(p)
86        data.append(s)
87        data.append(s+1)
88        if last <= s:
89          last = s+1
90        else:
91          monotonic = True
92    data.sort()
93    self.data = tuple(self._remove_pairs(data))
94    self.monotonic = monotonic
95
96  @staticmethod
97  def _remove_pairs(source):
98    last = None
99    for i in source:
100      if i == last:
101        last = None
102      else:
103        if last is not None:
104          yield last
105        last = i
106    if last is not None:
107      yield last
108
109  def to_string(self):
110    out = []
111    for i in range(0, len(self.data), 2):
112      s, e = self.data[i:i+2]
113      if e == s+1:
114        out.append(str(s))
115      else:
116        out.append(str(s) + "-" + str(e-1))
117    return " ".join(out)
118
119  def to_string_raw(self):
120    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
121
122  def union(self, other):
123    """Return a new RangeSet representing the union of this RangeSet
124    with the argument.
125
126    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
127    <RangeSet("10-34")>
128    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
129    <RangeSet("10-19 22 30-34")>
130    """
131    out = []
132    z = 0
133    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
134                            zip(other.data, itertools.cycle((+1, -1)))):
135      if (z == 0 and d == 1) or (z == 1 and d == -1):
136        out.append(p)
137      z += d
138    return RangeSet(data=out)
139
140  def intersect(self, other):
141    """Return a new RangeSet representing the intersection of this
142    RangeSet with the argument.
143
144    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
145    <RangeSet("18-19 30-32")>
146    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
147    <RangeSet("")>
148    """
149    out = []
150    z = 0
151    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
152                            zip(other.data, itertools.cycle((+1, -1)))):
153      if (z == 1 and d == 1) or (z == 2 and d == -1):
154        out.append(p)
155      z += d
156    return RangeSet(data=out)
157
158  def subtract(self, other):
159    """Return a new RangeSet representing subtracting the argument
160    from this RangeSet.
161
162    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
163    <RangeSet("10-17 33-34")>
164    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
165    <RangeSet("10-19 30-34")>
166    """
167
168    out = []
169    z = 0
170    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
171                            zip(other.data, itertools.cycle((-1, +1)))):
172      if (z == 0 and d == 1) or (z == 1 and d == -1):
173        out.append(p)
174      z += d
175    return RangeSet(data=out)
176
177  def overlaps(self, other):
178    """Returns true if the argument has a nonempty overlap with this
179    RangeSet.
180
181    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
182    True
183    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
184    False
185    """
186
187    # This is like intersect, but we can stop as soon as we discover the
188    # output is going to be nonempty.
189    z = 0
190    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
191                            zip(other.data, itertools.cycle((+1, -1)))):
192      if (z == 1 and d == 1) or (z == 2 and d == -1):
193        return True
194      z += d
195    return False
196
197  def size(self):
198    """Returns the total size of the RangeSet (ie, how many integers
199    are in the set).
200
201    >>> RangeSet("10-19 30-34").size()
202    15
203    """
204
205    total = 0
206    for i, p in enumerate(self.data):
207      if i % 2:
208        total += p
209      else:
210        total -= p
211    return total
212
213  def map_within(self, other):
214    """'other' should be a subset of 'self'.  Returns a RangeSet
215    representing what 'other' would get translated to if the integers
216    of 'self' were translated down to be contiguous starting at zero.
217
218    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
219    <RangeSet("3-4")>
220    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
221    <RangeSet("3-4")>
222    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
223    <RangeSet("7-12")>
224    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
225    <RangeSet("2-3 7-12")>
226    """
227
228    out = []
229    offset = 0
230    start = None
231    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
232                            zip(other.data, itertools.cycle((-1, +1)))):
233      if d == -5:
234        start = p
235      elif d == +5:
236        offset += p-start
237        start = None
238      else:
239        out.append(offset + p - start)
240    return RangeSet(data=out)
241
242  def extend(self, n):
243    """Extend the RangeSet by 'n' blocks.
244
245    The lower bound is guaranteed to be non-negative.
246
247    >>> RangeSet("0-9").extend(1)
248    <RangeSet("0-10")>
249    >>> RangeSet("10-19").extend(15)
250    <RangeSet("0-34")>
251    >>> RangeSet("10-19 30-39").extend(4)
252    <RangeSet("6-23 26-43")>
253    >>> RangeSet("10-19 30-39").extend(10)
254    <RangeSet("0-49")>
255    """
256    out = self
257    for i in range(0, len(self.data), 2):
258      s, e = self.data[i:i+2]
259      s1 = max(0, s - n)
260      e1 = e + n
261      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
262    return out
263
264  def first(self, n):
265    """Return the RangeSet that contains at most the first 'n' integers.
266
267    >>> RangeSet("0-9").first(1)
268    <RangeSet("0")>
269    >>> RangeSet("10-19").first(5)
270    <RangeSet("10-14")>
271    >>> RangeSet("10-19").first(15)
272    <RangeSet("10-19")>
273    >>> RangeSet("10-19 30-39").first(3)
274    <RangeSet("10-12")>
275    >>> RangeSet("10-19 30-39").first(15)
276    <RangeSet("10-19 30-34")>
277    >>> RangeSet("10-19 30-39").first(30)
278    <RangeSet("10-19 30-39")>
279    >>> RangeSet("0-9").first(0)
280    <RangeSet("")>
281    """
282
283    if self.size() <= n:
284      return self
285
286    out = []
287    for s, e in self:
288      if e - s >= n:
289        out += (s, s+n)
290        break
291      else:
292        out += (s, e)
293        n -= e - s
294    return RangeSet(data=out)
295
296
297if __name__ == "__main__":
298  import doctest
299  doctest.testmod()
300