rangelib.py revision 424296a4e811f57ada27be68d60aeb5fcb1bb45b
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22  """A RangeSet represents a set of nonoverlapping ranges on the
23  integers (ie, a set of integers, but efficient when the set contains
24  lots of runs."""
25
26  def __init__(self, data=None):
27    if data:
28      self.data = tuple(self._remove_pairs(data))
29    else:
30      self.data = ()
31
32  def __iter__(self):
33    for i in range(0, len(self.data), 2):
34      yield self.data[i:i+2]
35
36  def __eq__(self, other):
37    return self.data == other.data
38  def __ne__(self, other):
39    return self.data != other.data
40  def __nonzero__(self):
41    return bool(self.data)
42
43  def __str__(self):
44    if not self.data:
45      return "empty"
46    else:
47      return self.to_string()
48
49  @classmethod
50  def parse(cls, text):
51    """Parse a text string consisting of a space-separated list of
52    blocks and ranges, eg "10-20 30 35-40".  Ranges are interpreted to
53    include both their ends (so the above example represents 18
54    individual blocks.  Returns a RangeSet object.
55
56    If the input has all its blocks in increasing order, then returned
57    RangeSet will have an extra attribute 'monotonic' that is set to
58    True.  For example the input "10-20 30" is monotonic, but the input
59    "15-20 30 10-14" is not, even though they represent the same set
60    of blocks (and the two RangeSets will compare equal with ==).
61    """
62
63    data = []
64    last = -1
65    monotonic = True
66    for p in text.split():
67      if "-" in p:
68        s, e = p.split("-")
69        data.append(int(s))
70        data.append(int(e)+1)
71        if last <= s <= e:
72          last = e
73        else:
74          monotonic = False
75      else:
76        s = int(p)
77        data.append(s)
78        data.append(s+1)
79        if last <= s:
80          last = s+1
81        else:
82          monotonic = True
83    data.sort()
84    r = RangeSet(cls._remove_pairs(data))
85    r.monotonic = monotonic
86    return r
87
88  @staticmethod
89  def _remove_pairs(source):
90    last = None
91    for i in source:
92      if i == last:
93        last = None
94      else:
95        if last is not None:
96          yield last
97        last = i
98    if last is not None:
99      yield last
100
101  def to_string(self):
102    out = []
103    for i in range(0, len(self.data), 2):
104      s, e = self.data[i:i+2]
105      if e == s+1:
106        out.append(str(s))
107      else:
108        out.append(str(s) + "-" + str(e-1))
109    return " ".join(out)
110
111  def to_string_raw(self):
112    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
113
114  def union(self, other):
115    """Return a new RangeSet representing the union of this RangeSet
116    with the argument."""
117    out = []
118    z = 0
119    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
120                            zip(other.data, itertools.cycle((+1, -1)))):
121      if (z == 0 and d == 1) or (z == 1 and d == -1):
122        out.append(p)
123      z += d
124    return RangeSet(data=out)
125
126  def intersect(self, other):
127    """Return a new RangeSet representing the intersection of this
128    RangeSet with the argument."""
129    out = []
130    z = 0
131    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
132                            zip(other.data, itertools.cycle((+1, -1)))):
133      if (z == 1 and d == 1) or (z == 2 and d == -1):
134        out.append(p)
135      z += d
136    return RangeSet(data=out)
137
138  def subtract(self, other):
139    """Return a new RangeSet representing subtracting the argument
140    from this RangeSet."""
141
142    out = []
143    z = 0
144    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
145                            zip(other.data, itertools.cycle((-1, +1)))):
146      if (z == 0 and d == 1) or (z == 1 and d == -1):
147        out.append(p)
148      z += d
149    return RangeSet(data=out)
150
151  def overlaps(self, other):
152    """Returns true if the argument has a nonempty overlap with this
153    RangeSet."""
154
155    # This is like intersect, but we can stop as soon as we discover the
156    # output is going to be nonempty.
157    z = 0
158    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
159                            zip(other.data, itertools.cycle((+1, -1)))):
160      if (z == 1 and d == 1) or (z == 2 and d == -1):
161        return True
162      z += d
163    return False
164
165  def size(self):
166    """Returns the total size of the RangeSet (ie, how many integers
167    are in the set)."""
168
169    total = 0
170    for i, p in enumerate(self.data):
171      if i % 2:
172        total += p
173      else:
174        total -= p
175    return total
176