rangelib.py revision 9a751322e1fad2625a59a8304e83b99b6d012cbb
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    if isinstance(data, str):
28      self._parse_internal(data)
29    elif data:
30      self.data = tuple(self._remove_pairs(data))
31    else:
32      self.data = ()
33
34  def __iter__(self):
35    for i in range(0, len(self.data), 2):
36      yield self.data[i:i+2]
37
38  def __eq__(self, other):
39    return self.data == other.data
40  def __ne__(self, other):
41    return self.data != other.data
42  def __nonzero__(self):
43    return bool(self.data)
44
45  def __str__(self):
46    if not self.data:
47      return "empty"
48    else:
49      return self.to_string()
50
51  def __repr__(self):
52    return '<RangeSet("' + self.to_string() + '")>'
53
54  @classmethod
55  def parse(cls, text):
56    """Parse a text string consisting of a space-separated list of
57    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
58    include both their ends (so the above example represents 18
59    individual blocks.  Returns a RangeSet object.
60
61    If the input has all its blocks in increasing order, then returned
62    RangeSet will have an extra attribute 'monotonic' that is set to
63    True.  For example the input "10-20 30" is monotonic, but the input
64    "15-20 30 10-14" is not, even though they represent the same set
65    of blocks (and the two RangeSets will compare equal with ==).
66    """
67    return cls(text)
68
69  def _parse_internal(self, text):
70    data = []
71    last = -1
72    monotonic = True
73    for p in text.split():
74      if "-" in p:
75        s, e = p.split("-")
76        data.append(int(s))
77        data.append(int(e)+1)
78        if last <= s <= e:
79          last = e
80        else:
81          monotonic = False
82      else:
83        s = int(p)
84        data.append(s)
85        data.append(s+1)
86        if last <= s:
87          last = s+1
88        else:
89          monotonic = True
90    data.sort()
91    self.data = tuple(self._remove_pairs(data))
92    self.monotonic = monotonic
93
94  @staticmethod
95  def _remove_pairs(source):
96    last = None
97    for i in source:
98      if i == last:
99        last = None
100      else:
101        if last is not None:
102          yield last
103        last = i
104    if last is not None:
105      yield last
106
107  def to_string(self):
108    out = []
109    for i in range(0, len(self.data), 2):
110      s, e = self.data[i:i+2]
111      if e == s+1:
112        out.append(str(s))
113      else:
114        out.append(str(s) + "-" + str(e-1))
115    return " ".join(out)
116
117  def to_string_raw(self):
118    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
119
120  def union(self, other):
121    """Return a new RangeSet representing the union of this RangeSet
122    with the argument.
123
124    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
125    <RangeSet("10-34")>
126    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
127    <RangeSet("10-19 22 30-34")>
128    """
129    out = []
130    z = 0
131    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
132                            zip(other.data, itertools.cycle((+1, -1)))):
133      if (z == 0 and d == 1) or (z == 1 and d == -1):
134        out.append(p)
135      z += d
136    return RangeSet(data=out)
137
138  def intersect(self, other):
139    """Return a new RangeSet representing the intersection of this
140    RangeSet with the argument.
141
142    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
143    <RangeSet("18-19 30-32")>
144    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
145    <RangeSet("")>
146    """
147    out = []
148    z = 0
149    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
150                            zip(other.data, itertools.cycle((+1, -1)))):
151      if (z == 1 and d == 1) or (z == 2 and d == -1):
152        out.append(p)
153      z += d
154    return RangeSet(data=out)
155
156  def subtract(self, other):
157    """Return a new RangeSet representing subtracting the argument
158    from this RangeSet.
159
160    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
161    <RangeSet("10-17 33-34")>
162    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
163    <RangeSet("10-19 30-34")>
164    """
165
166    out = []
167    z = 0
168    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
169                            zip(other.data, itertools.cycle((-1, +1)))):
170      if (z == 0 and d == 1) or (z == 1 and d == -1):
171        out.append(p)
172      z += d
173    return RangeSet(data=out)
174
175  def overlaps(self, other):
176    """Returns true if the argument has a nonempty overlap with this
177    RangeSet.
178
179    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
180    True
181    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
182    False
183    """
184
185    # This is like intersect, but we can stop as soon as we discover the
186    # output is going to be nonempty.
187    z = 0
188    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
189                            zip(other.data, itertools.cycle((+1, -1)))):
190      if (z == 1 and d == 1) or (z == 2 and d == -1):
191        return True
192      z += d
193    return False
194
195  def size(self):
196    """Returns the total size of the RangeSet (ie, how many integers
197    are in the set).
198
199    >>> RangeSet("10-19 30-34").size()
200    15
201    """
202
203    total = 0
204    for i, p in enumerate(self.data):
205      if i % 2:
206        total += p
207      else:
208        total -= p
209    return total
210
211  def map_within(self, other):
212    """'other' should be a subset of 'self'.  Returns a RangeSet
213    representing what 'other' would get translated to if the integers
214    of 'self' were translated down to be contiguous starting at zero.
215
216    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
217    <RangeSet("3-4")>
218    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
219    <RangeSet("3-4")>
220    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
221    <RangeSet("7-12")>
222    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
223    <RangeSet("2-3 7-12")>
224    """
225
226    out = []
227    offset = 0
228    start = None
229    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
230                            zip(other.data, itertools.cycle((-1, +1)))):
231      if d == -5:
232        start = p
233      elif d == +5:
234        offset += p-start
235        start = None
236      else:
237        out.append(offset + p - start)
238    return RangeSet(data=out)
239
240
241if __name__ == "__main__":
242  import doctest
243  doctest.testmod()
244