sparse_img.py revision fc44a515d46e6f4d5eaa0d32659b1cf3b9492305
1import bisect
2import os
3import sys
4import struct
5import pprint
6from hashlib import sha1
7
8from rangelib import *
9
10class SparseImage(object):
11  """Wraps a sparse image file (and optional file map) into an image
12  object suitable for passing to BlockImageDiff."""
13
14  def __init__(self, simg_fn, file_map_fn=None):
15    self.simg_f = f = open(simg_fn, "rb")
16
17    header_bin = f.read(28)
18    header = struct.unpack("<I4H4I", header_bin)
19
20    magic = header[0]
21    major_version = header[1]
22    minor_version = header[2]
23    file_hdr_sz = header[3]
24    chunk_hdr_sz = header[4]
25    self.blocksize = blk_sz = header[5]
26    self.total_blocks = total_blks = header[6]
27    total_chunks = header[7]
28    image_checksum = header[8]
29
30    if magic != 0xED26FF3A:
31      raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
32    if major_version != 1 or minor_version != 0:
33      raise ValueError("I know about version 1.0, but this is version %u.%u" %
34                       (major_version, minor_version))
35    if file_hdr_sz != 28:
36      raise ValueError("File header size was expected to be 28, but is %u." %
37                       (file_hdr_sz,))
38    if chunk_hdr_sz != 12:
39      raise ValueError("Chunk header size was expected to be 12, but is %u." %
40                       (chunk_hdr_sz,))
41
42    print("Total of %u %u-byte output blocks in %u input chunks."
43          % (total_blks, blk_sz, total_chunks))
44
45    pos = 0   # in blocks
46    care_data = []
47    self.offset_map = offset_map = []
48
49    for i in range(total_chunks):
50      header_bin = f.read(12)
51      header = struct.unpack("<2H2I", header_bin)
52      chunk_type = header[0]
53      reserved1 = header[1]
54      chunk_sz = header[2]
55      total_sz = header[3]
56      data_sz = total_sz - 12
57
58      if chunk_type == 0xCAC1:
59        if data_sz != (chunk_sz * blk_sz):
60          raise ValueError(
61              "Raw chunk input size (%u) does not match output size (%u)" %
62              (data_sz, chunk_sz * blk_sz))
63        else:
64          care_data.append(pos)
65          care_data.append(pos + chunk_sz)
66          offset_map.append((pos, chunk_sz, f.tell()))
67          pos += chunk_sz
68          f.seek(data_sz, os.SEEK_CUR)
69
70      elif chunk_type == 0xCAC2:
71        raise ValueError("Fill chunks are not supported")
72
73      elif chunk_type == 0xCAC3:
74        if data_sz != 0:
75          raise ValueError("Don't care chunk input size is non-zero (%u)" %
76                           (data_sz))
77        else:
78          pos += chunk_sz
79
80      elif chunk_type == 0xCAC4:
81        raise ValueError("CRC32 chunks are not supported")
82
83      else:
84        raise ValueError("Unknown chunk type 0x%04X not supported" %
85                         (chunk_type,))
86
87    self.care_map = RangeSet(care_data)
88    self.offset_index = [i[0] for i in offset_map]
89
90    if file_map_fn:
91      self.LoadFileBlockMap(file_map_fn)
92    else:
93      self.file_map = {"__DATA": self.care_map}
94
95  def ReadRangeSet(self, ranges):
96    return [d for d in self._GetRangeData(ranges)]
97
98  def TotalSha1(self):
99    """Return the SHA-1 hash of all data in the 'care' regions of this image."""
100    h = sha1()
101    for d in self._GetRangeData(self.care_map):
102      h.update(d)
103    return h.hexdigest()
104
105  def _GetRangeData(self, ranges):
106    """Generator that produces all the image data in 'ranges'.  The
107    number of individual pieces returned is arbitrary (and in
108    particular is not necessarily equal to the number of ranges in
109    'ranges'.
110
111    This generator is stateful -- it depends on the open file object
112    contained in this SparseImage, so you should not try to run two
113    instances of this generator on the same object simultaneously."""
114
115    f = self.simg_f
116    for s, e in ranges:
117      to_read = e-s
118      idx = bisect.bisect_right(self.offset_index, s) - 1
119      chunk_start, chunk_len, filepos = self.offset_map[idx]
120
121      # for the first chunk we may be starting partway through it.
122      p = filepos + ((s - chunk_start) * self.blocksize)
123      remain = chunk_len - (s - chunk_start)
124
125      f.seek(p, os.SEEK_SET)
126      this_read = min(remain, to_read)
127      yield f.read(this_read * self.blocksize)
128      to_read -= this_read
129
130      while to_read > 0:
131        # continue with following chunks if this range spans multiple chunks.
132        idx += 1
133        chunk_start, chunk_len, filepos = self.offset_map[idx]
134        f.seek(filepos, os.SEEK_SET)
135        this_read = min(chunk_len, to_read)
136        yield f.read(this_read * self.blocksize)
137        to_read -= this_read
138
139  def LoadFileBlockMap(self, fn):
140    remaining = self.care_map
141    self.file_map = out = {}
142
143    with open(fn) as f:
144      for line in f:
145        fn, ranges = line.split(None, 1)
146        ranges = RangeSet.parse(ranges)
147        out[fn] = ranges
148        assert ranges.size() == ranges.intersect(remaining).size()
149        remaining = remaining.subtract(ranges)
150
151    # For all the remaining blocks in the care_map (ie, those that
152    # aren't part of the data for any file), divide them into blocks
153    # that are all zero and blocks that aren't.  (Zero blocks are
154    # handled specially because (1) there are usually a lot of them
155    # and (2) bsdiff handles files with long sequences of repeated
156    # bytes especially poorly.)
157
158    zero_blocks = []
159    nonzero_blocks = []
160    reference = '\0' * self.blocksize
161
162    f = self.simg_f
163    for s, e in remaining:
164      for b in range(s, e):
165        idx = bisect.bisect_right(self.offset_index, b) - 1
166        chunk_start, chunk_len, filepos = self.offset_map[idx]
167        filepos += (b-chunk_start) * self.blocksize
168        f.seek(filepos, os.SEEK_SET)
169        data = f.read(self.blocksize)
170
171        if data == reference:
172          zero_blocks.append(b)
173          zero_blocks.append(b+1)
174        else:
175          nonzero_blocks.append(b)
176          nonzero_blocks.append(b+1)
177
178    out["__ZERO"] = RangeSet(data=zero_blocks)
179    out["__NONZERO"] = RangeSet(data=nonzero_blocks)
180
181  def ResetFileMap(self):
182    """Throw away the file map and treat the entire image as
183    undifferentiated data."""
184    self.file_map = {"__DATA": self.care_map}
185