rangelib.py revision 8e1fb06f383da53789e79255f8f083a9b8d721f6
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    self.monotonic = False
28    if isinstance(data, str):
29      self._parse_internal(data)
30    elif data:
31      assert len(data) % 2 == 0
32      self.data = tuple(self._remove_pairs(data))
33      self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
34    else:
35      self.data = ()
36
37  def __iter__(self):
38    for i in range(0, len(self.data), 2):
39      yield self.data[i:i+2]
40
41  def __eq__(self, other):
42    return self.data == other.data
43
44  def __ne__(self, other):
45    return self.data != other.data
46
47  def __nonzero__(self):
48    return bool(self.data)
49
50  def __str__(self):
51    if not self.data:
52      return "empty"
53    else:
54      return self.to_string()
55
56  def __repr__(self):
57    return '<RangeSet("' + self.to_string() + '")>'
58
59  @classmethod
60  def parse(cls, text):
61    """Parse a text string consisting of a space-separated list of
62    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
63    include both their ends (so the above example represents 18
64    individual blocks.  Returns a RangeSet object.
65
66    If the input has all its blocks in increasing order, then returned
67    RangeSet will have an extra attribute 'monotonic' that is set to
68    True.  For example the input "10-20 30" is monotonic, but the input
69    "15-20 30 10-14" is not, even though they represent the same set
70    of blocks (and the two RangeSets will compare equal with ==).
71    """
72    return cls(text)
73
74  def _parse_internal(self, text):
75    data = []
76    last = -1
77    monotonic = True
78    for p in text.split():
79      if "-" in p:
80        s, e = (int(x) for x in p.split("-"))
81        data.append(s)
82        data.append(e+1)
83        if last <= s <= e:
84          last = e
85        else:
86          monotonic = False
87      else:
88        s = int(p)
89        data.append(s)
90        data.append(s+1)
91        if last <= s:
92          last = s+1
93        else:
94          monotonic = False
95    data.sort()
96    self.data = tuple(self._remove_pairs(data))
97    self.monotonic = monotonic
98
99  @staticmethod
100  def _remove_pairs(source):
101    """Remove consecutive duplicate items to simplify the result.
102
103    [1, 2, 2, 5, 5, 10] will become [1, 10]."""
104    last = None
105    for i in source:
106      if i == last:
107        last = None
108      else:
109        if last is not None:
110          yield last
111        last = i
112    if last is not None:
113      yield last
114
115  def to_string(self):
116    out = []
117    for i in range(0, len(self.data), 2):
118      s, e = self.data[i:i+2]
119      if e == s+1:
120        out.append(str(s))
121      else:
122        out.append(str(s) + "-" + str(e-1))
123    return " ".join(out)
124
125  def to_string_raw(self):
126    assert self.data
127    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
128
129  def union(self, other):
130    """Return a new RangeSet representing the union of this RangeSet
131    with the argument.
132
133    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
134    <RangeSet("10-34")>
135    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
136    <RangeSet("10-19 22 30-34")>
137    """
138    out = []
139    z = 0
140    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
141                            zip(other.data, itertools.cycle((+1, -1)))):
142      if (z == 0 and d == 1) or (z == 1 and d == -1):
143        out.append(p)
144      z += d
145    return RangeSet(data=out)
146
147  def intersect(self, other):
148    """Return a new RangeSet representing the intersection of this
149    RangeSet with the argument.
150
151    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
152    <RangeSet("18-19 30-32")>
153    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
154    <RangeSet("")>
155    """
156    out = []
157    z = 0
158    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
159                            zip(other.data, itertools.cycle((+1, -1)))):
160      if (z == 1 and d == 1) or (z == 2 and d == -1):
161        out.append(p)
162      z += d
163    return RangeSet(data=out)
164
165  def subtract(self, other):
166    """Return a new RangeSet representing subtracting the argument
167    from this RangeSet.
168
169    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
170    <RangeSet("10-17 33-34")>
171    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
172    <RangeSet("10-19 30-34")>
173    """
174
175    out = []
176    z = 0
177    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
178                            zip(other.data, itertools.cycle((-1, +1)))):
179      if (z == 0 and d == 1) or (z == 1 and d == -1):
180        out.append(p)
181      z += d
182    return RangeSet(data=out)
183
184  def overlaps(self, other):
185    """Returns true if the argument has a nonempty overlap with this
186    RangeSet.
187
188    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
189    True
190    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
191    False
192    """
193
194    # This is like intersect, but we can stop as soon as we discover the
195    # output is going to be nonempty.
196    z = 0
197    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
198                            zip(other.data, itertools.cycle((+1, -1)))):
199      if (z == 1 and d == 1) or (z == 2 and d == -1):
200        return True
201      z += d
202    return False
203
204  def size(self):
205    """Returns the total size of the RangeSet (ie, how many integers
206    are in the set).
207
208    >>> RangeSet("10-19 30-34").size()
209    15
210    """
211
212    total = 0
213    for i, p in enumerate(self.data):
214      if i % 2:
215        total += p
216      else:
217        total -= p
218    return total
219
220  def map_within(self, other):
221    """'other' should be a subset of 'self'.  Returns a RangeSet
222    representing what 'other' would get translated to if the integers
223    of 'self' were translated down to be contiguous starting at zero.
224
225    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
226    <RangeSet("3-4")>
227    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
228    <RangeSet("3-4")>
229    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
230    <RangeSet("7-12")>
231    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
232    <RangeSet("2-3 7-12")>
233    """
234
235    out = []
236    offset = 0
237    start = None
238    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
239                            zip(other.data, itertools.cycle((-1, +1)))):
240      if d == -5:
241        start = p
242      elif d == +5:
243        offset += p-start
244        start = None
245      else:
246        out.append(offset + p - start)
247    return RangeSet(data=out)
248
249  def extend(self, n):
250    """Extend the RangeSet by 'n' blocks.
251
252    The lower bound is guaranteed to be non-negative.
253
254    >>> RangeSet("0-9").extend(1)
255    <RangeSet("0-10")>
256    >>> RangeSet("10-19").extend(15)
257    <RangeSet("0-34")>
258    >>> RangeSet("10-19 30-39").extend(4)
259    <RangeSet("6-23 26-43")>
260    >>> RangeSet("10-19 30-39").extend(10)
261    <RangeSet("0-49")>
262    """
263    out = self
264    for i in range(0, len(self.data), 2):
265      s, e = self.data[i:i+2]
266      s1 = max(0, s - n)
267      e1 = e + n
268      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
269    return out
270
271  def first(self, n):
272    """Return the RangeSet that contains at most the first 'n' integers.
273
274    >>> RangeSet("0-9").first(1)
275    <RangeSet("0")>
276    >>> RangeSet("10-19").first(5)
277    <RangeSet("10-14")>
278    >>> RangeSet("10-19").first(15)
279    <RangeSet("10-19")>
280    >>> RangeSet("10-19 30-39").first(3)
281    <RangeSet("10-12")>
282    >>> RangeSet("10-19 30-39").first(15)
283    <RangeSet("10-19 30-34")>
284    >>> RangeSet("10-19 30-39").first(30)
285    <RangeSet("10-19 30-39")>
286    >>> RangeSet("0-9").first(0)
287    <RangeSet("")>
288    """
289
290    if self.size() <= n:
291      return self
292
293    out = []
294    for s, e in self:
295      if e - s >= n:
296        out += (s, s+n)
297        break
298      else:
299        out += (s, e)
300        n -= e - s
301    return RangeSet(data=out)
302
303
304if __name__ == "__main__":
305  import doctest
306  doctest.testmod()
307