sparse_img.py revision 41bcf3e425b9a1792b65b16b1c115b018348bde7
1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import bisect
16import os
17import struct
18from hashlib import sha1
19
20import rangelib
21
22
23class SparseImage(object):
24  """Wraps a sparse image file (and optional file map) into an image
25  object suitable for passing to BlockImageDiff."""
26
27  def __init__(self, simg_fn, file_map_fn=None):
28    self.simg_f = f = open(simg_fn, "rb")
29
30    header_bin = f.read(28)
31    header = struct.unpack("<I4H4I", header_bin)
32
33    magic = header[0]
34    major_version = header[1]
35    minor_version = header[2]
36    file_hdr_sz = header[3]
37    chunk_hdr_sz = header[4]
38    self.blocksize = blk_sz = header[5]
39    self.total_blocks = total_blks = header[6]
40    total_chunks = header[7]
41
42    if magic != 0xED26FF3A:
43      raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
44    if major_version != 1 or minor_version != 0:
45      raise ValueError("I know about version 1.0, but this is version %u.%u" %
46                       (major_version, minor_version))
47    if file_hdr_sz != 28:
48      raise ValueError("File header size was expected to be 28, but is %u." %
49                       (file_hdr_sz,))
50    if chunk_hdr_sz != 12:
51      raise ValueError("Chunk header size was expected to be 12, but is %u." %
52                       (chunk_hdr_sz,))
53
54    print("Total of %u %u-byte output blocks in %u input chunks."
55          % (total_blks, blk_sz, total_chunks))
56
57    pos = 0   # in blocks
58    care_data = []
59    self.offset_map = offset_map = []
60
61    for i in range(total_chunks):
62      header_bin = f.read(12)
63      header = struct.unpack("<2H2I", header_bin)
64      chunk_type = header[0]
65      chunk_sz = header[2]
66      total_sz = header[3]
67      data_sz = total_sz - 12
68
69      if chunk_type == 0xCAC1:
70        if data_sz != (chunk_sz * blk_sz):
71          raise ValueError(
72              "Raw chunk input size (%u) does not match output size (%u)" %
73              (data_sz, chunk_sz * blk_sz))
74        else:
75          care_data.append(pos)
76          care_data.append(pos + chunk_sz)
77          offset_map.append((pos, chunk_sz, f.tell(), None))
78          pos += chunk_sz
79          f.seek(data_sz, os.SEEK_CUR)
80
81      elif chunk_type == 0xCAC2:
82        fill_data = f.read(4)
83        care_data.append(pos)
84        care_data.append(pos + chunk_sz)
85        offset_map.append((pos, chunk_sz, None, fill_data))
86        pos += chunk_sz
87
88      elif chunk_type == 0xCAC3:
89        if data_sz != 0:
90          raise ValueError("Don't care chunk input size is non-zero (%u)" %
91                           (data_sz))
92        else:
93          pos += chunk_sz
94
95      elif chunk_type == 0xCAC4:
96        raise ValueError("CRC32 chunks are not supported")
97
98      else:
99        raise ValueError("Unknown chunk type 0x%04X not supported" %
100                         (chunk_type,))
101
102    self.care_map = rangelib.RangeSet(care_data)
103    self.offset_index = [i[0] for i in offset_map]
104
105    if file_map_fn:
106      self.LoadFileBlockMap(file_map_fn)
107    else:
108      self.file_map = {"__DATA": self.care_map}
109
110  def ReadRangeSet(self, ranges):
111    return [d for d in self._GetRangeData(ranges)]
112
113  def TotalSha1(self):
114    """Return the SHA-1 hash of all data in the 'care' regions of this image."""
115    h = sha1()
116    for d in self._GetRangeData(self.care_map):
117      h.update(d)
118    return h.hexdigest()
119
120  def _GetRangeData(self, ranges):
121    """Generator that produces all the image data in 'ranges'.  The
122    number of individual pieces returned is arbitrary (and in
123    particular is not necessarily equal to the number of ranges in
124    'ranges'.
125
126    This generator is stateful -- it depends on the open file object
127    contained in this SparseImage, so you should not try to run two
128    instances of this generator on the same object simultaneously."""
129
130    f = self.simg_f
131    for s, e in ranges:
132      to_read = e-s
133      idx = bisect.bisect_right(self.offset_index, s) - 1
134      chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
135
136      # for the first chunk we may be starting partway through it.
137      remain = chunk_len - (s - chunk_start)
138      this_read = min(remain, to_read)
139      if filepos is not None:
140        p = filepos + ((s - chunk_start) * self.blocksize)
141        f.seek(p, os.SEEK_SET)
142        yield f.read(this_read * self.blocksize)
143      else:
144        yield fill_data * (this_read * (self.blocksize >> 2))
145      to_read -= this_read
146
147      while to_read > 0:
148        # continue with following chunks if this range spans multiple chunks.
149        idx += 1
150        chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
151        this_read = min(chunk_len, to_read)
152        if filepos is not None:
153          f.seek(filepos, os.SEEK_SET)
154          yield f.read(this_read * self.blocksize)
155        else:
156          yield fill_data * (this_read * (self.blocksize >> 2))
157        to_read -= this_read
158
159  def LoadFileBlockMap(self, fn):
160    remaining = self.care_map
161    self.file_map = out = {}
162
163    with open(fn) as f:
164      for line in f:
165        fn, ranges = line.split(None, 1)
166        ranges = rangelib.RangeSet.parse(ranges)
167        out[fn] = ranges
168        assert ranges.size() == ranges.intersect(remaining).size()
169        remaining = remaining.subtract(ranges)
170
171    # For all the remaining blocks in the care_map (ie, those that
172    # aren't part of the data for any file), divide them into blocks
173    # that are all zero and blocks that aren't.  (Zero blocks are
174    # handled specially because (1) there are usually a lot of them
175    # and (2) bsdiff handles files with long sequences of repeated
176    # bytes especially poorly.)
177
178    zero_blocks = []
179    nonzero_blocks = []
180    reference = '\0' * self.blocksize
181
182    f = self.simg_f
183    for s, e in remaining:
184      for b in range(s, e):
185        idx = bisect.bisect_right(self.offset_index, b) - 1
186        chunk_start, _, filepos, fill_data = self.offset_map[idx]
187        if filepos is not None:
188          filepos += (b-chunk_start) * self.blocksize
189          f.seek(filepos, os.SEEK_SET)
190          data = f.read(self.blocksize)
191        else:
192          if fill_data == reference[:4]:   # fill with all zeros
193            data = reference
194          else:
195            data = None
196
197        if data == reference:
198          zero_blocks.append(b)
199          zero_blocks.append(b+1)
200        else:
201          nonzero_blocks.append(b)
202          nonzero_blocks.append(b+1)
203
204    assert zero_blocks or nonzero_blocks
205
206    if zero_blocks:
207      out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
208    if nonzero_blocks:
209      out["__NONZERO"] = rangelib.RangeSet(data=nonzero_blocks)
210
211  def ResetFileMap(self):
212    """Throw away the file map and treat the entire image as
213    undifferentiated data."""
214    self.file_map = {"__DATA": self.care_map}
215