sparse_img.py revision 007979ee7543a396d97b3e9ada21aca44d503597
1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15import bisect 16import os 17import sys 18import struct 19import pprint 20from hashlib import sha1 21 22from rangelib import * 23 24class SparseImage(object): 25 """Wraps a sparse image file into an image object. 26 27 Wraps a sparse image file (and optional file map and clobbered_blocks) into 28 an image object suitable for passing to BlockImageDiff. file_map contains 29 the mapping between files and their blocks. clobbered_blocks contains the set 30 of blocks that should be always written to the target regardless of the old 31 contents (i.e. copying instead of patching). clobbered_blocks should be in 32 the form of a string like "0" or "0 1-5 8". 33 """ 34 35 def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None): 36 self.simg_f = f = open(simg_fn, "rb") 37 38 header_bin = f.read(28) 39 header = struct.unpack("<I4H4I", header_bin) 40 41 magic = header[0] 42 major_version = header[1] 43 minor_version = header[2] 44 file_hdr_sz = header[3] 45 chunk_hdr_sz = header[4] 46 self.blocksize = blk_sz = header[5] 47 self.total_blocks = total_blks = header[6] 48 total_chunks = header[7] 49 image_checksum = header[8] 50 51 if magic != 0xED26FF3A: 52 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,)) 53 if major_version != 1 or minor_version != 0: 54 raise ValueError("I know about version 1.0, but this is version %u.%u" % 55 (major_version, minor_version)) 56 if file_hdr_sz != 28: 57 raise ValueError("File header size was expected to be 28, but is %u." % 58 (file_hdr_sz,)) 59 if chunk_hdr_sz != 12: 60 raise ValueError("Chunk header size was expected to be 12, but is %u." % 61 (chunk_hdr_sz,)) 62 63 print("Total of %u %u-byte output blocks in %u input chunks." 64 % (total_blks, blk_sz, total_chunks)) 65 66 pos = 0 # in blocks 67 care_data = [] 68 self.offset_map = offset_map = [] 69 self.clobbered_blocks = RangeSet(data=clobbered_blocks) 70 71 for i in range(total_chunks): 72 header_bin = f.read(12) 73 header = struct.unpack("<2H2I", header_bin) 74 chunk_type = header[0] 75 reserved1 = header[1] 76 chunk_sz = header[2] 77 total_sz = header[3] 78 data_sz = total_sz - 12 79 80 if chunk_type == 0xCAC1: 81 if data_sz != (chunk_sz * blk_sz): 82 raise ValueError( 83 "Raw chunk input size (%u) does not match output size (%u)" % 84 (data_sz, chunk_sz * blk_sz)) 85 else: 86 care_data.append(pos) 87 care_data.append(pos + chunk_sz) 88 offset_map.append((pos, chunk_sz, f.tell(), None)) 89 pos += chunk_sz 90 f.seek(data_sz, os.SEEK_CUR) 91 92 elif chunk_type == 0xCAC2: 93 fill_data = f.read(4) 94 care_data.append(pos) 95 care_data.append(pos + chunk_sz) 96 offset_map.append((pos, chunk_sz, None, fill_data)) 97 pos += chunk_sz 98 99 elif chunk_type == 0xCAC3: 100 if data_sz != 0: 101 raise ValueError("Don't care chunk input size is non-zero (%u)" % 102 (data_sz)) 103 else: 104 pos += chunk_sz 105 106 elif chunk_type == 0xCAC4: 107 raise ValueError("CRC32 chunks are not supported") 108 109 else: 110 raise ValueError("Unknown chunk type 0x%04X not supported" % 111 (chunk_type,)) 112 113 self.care_map = RangeSet(care_data) 114 self.offset_index = [i[0] for i in offset_map] 115 116 if file_map_fn: 117 self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks) 118 else: 119 self.file_map = {"__DATA": self.care_map} 120 121 def ReadRangeSet(self, ranges): 122 return [d for d in self._GetRangeData(ranges)] 123 124 def TotalSha1(self): 125 """Return the SHA-1 hash of all data in the 'care' regions but not in 126 clobbered_blocks of this image.""" 127 h = sha1() 128 for d in self._GetRangeData(self.care_map.subtract(self.clobbered_blocks)): 129 h.update(d) 130 return h.hexdigest() 131 132 def _GetRangeData(self, ranges): 133 """Generator that produces all the image data in 'ranges'. The 134 number of individual pieces returned is arbitrary (and in 135 particular is not necessarily equal to the number of ranges in 136 'ranges'. 137 138 This generator is stateful -- it depends on the open file object 139 contained in this SparseImage, so you should not try to run two 140 instances of this generator on the same object simultaneously.""" 141 142 f = self.simg_f 143 for s, e in ranges: 144 to_read = e-s 145 idx = bisect.bisect_right(self.offset_index, s) - 1 146 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 147 148 # for the first chunk we may be starting partway through it. 149 remain = chunk_len - (s - chunk_start) 150 this_read = min(remain, to_read) 151 if filepos is not None: 152 p = filepos + ((s - chunk_start) * self.blocksize) 153 f.seek(p, os.SEEK_SET) 154 yield f.read(this_read * self.blocksize) 155 else: 156 yield fill_data * (this_read * (self.blocksize >> 2)) 157 to_read -= this_read 158 159 while to_read > 0: 160 # continue with following chunks if this range spans multiple chunks. 161 idx += 1 162 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 163 this_read = min(chunk_len, to_read) 164 if filepos is not None: 165 f.seek(filepos, os.SEEK_SET) 166 yield f.read(this_read * self.blocksize) 167 else: 168 yield fill_data * (this_read * (self.blocksize >> 2)) 169 to_read -= this_read 170 171 def LoadFileBlockMap(self, fn, clobbered_blocks): 172 remaining = self.care_map 173 self.file_map = out = {} 174 175 with open(fn) as f: 176 for line in f: 177 fn, ranges = line.split(None, 1) 178 ranges = RangeSet.parse(ranges) 179 out[fn] = ranges 180 assert ranges.size() == ranges.intersect(remaining).size() 181 182 # Currently we assume that blocks in clobbered_blocks are not part of 183 # any file. 184 assert not clobbered_blocks.overlaps(ranges) 185 remaining = remaining.subtract(ranges) 186 187 remaining = remaining.subtract(clobbered_blocks) 188 189 # For all the remaining blocks in the care_map (ie, those that 190 # aren't part of the data for any file nor part of the clobbered_blocks), 191 # divide them into blocks that are all zero and blocks that aren't. 192 # (Zero blocks are handled specially because (1) there are usually 193 # a lot of them and (2) bsdiff handles files with long sequences of 194 # repeated bytes especially poorly.) 195 196 zero_blocks = [] 197 nonzero_blocks = [] 198 reference = '\0' * self.blocksize 199 200 f = self.simg_f 201 for s, e in remaining: 202 for b in range(s, e): 203 idx = bisect.bisect_right(self.offset_index, b) - 1 204 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx] 205 if filepos is not None: 206 filepos += (b-chunk_start) * self.blocksize 207 f.seek(filepos, os.SEEK_SET) 208 data = f.read(self.blocksize) 209 else: 210 if fill_data == reference[:4]: # fill with all zeros 211 data = reference 212 else: 213 data = None 214 215 if data == reference: 216 zero_blocks.append(b) 217 zero_blocks.append(b+1) 218 else: 219 nonzero_blocks.append(b) 220 nonzero_blocks.append(b+1) 221 222 out["__ZERO"] = RangeSet(data=zero_blocks) 223 out["__NONZERO"] = RangeSet(data=nonzero_blocks) 224 out["__COPY"] = clobbered_blocks 225 226 def ResetFileMap(self): 227 """Throw away the file map and treat the entire image as 228 undifferentiated data.""" 229 self.file_map = {"__DATA": self.care_map} 230