sparse_img.py revision fc44a515d46e6f4d5eaa0d32659b1cf3b9492305
1import bisect 2import os 3import sys 4import struct 5import pprint 6from hashlib import sha1 7 8from rangelib import * 9 10class SparseImage(object): 11 """Wraps a sparse image file (and optional file map) into an image 12 object suitable for passing to BlockImageDiff.""" 13 14 def __init__(self, simg_fn, file_map_fn=None): 15 self.simg_f = f = open(simg_fn, "rb") 16 17 header_bin = f.read(28) 18 header = struct.unpack("<I4H4I", header_bin) 19 20 magic = header[0] 21 major_version = header[1] 22 minor_version = header[2] 23 file_hdr_sz = header[3] 24 chunk_hdr_sz = header[4] 25 self.blocksize = blk_sz = header[5] 26 self.total_blocks = total_blks = header[6] 27 total_chunks = header[7] 28 image_checksum = header[8] 29 30 if magic != 0xED26FF3A: 31 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,)) 32 if major_version != 1 or minor_version != 0: 33 raise ValueError("I know about version 1.0, but this is version %u.%u" % 34 (major_version, minor_version)) 35 if file_hdr_sz != 28: 36 raise ValueError("File header size was expected to be 28, but is %u." % 37 (file_hdr_sz,)) 38 if chunk_hdr_sz != 12: 39 raise ValueError("Chunk header size was expected to be 12, but is %u." % 40 (chunk_hdr_sz,)) 41 42 print("Total of %u %u-byte output blocks in %u input chunks." 43 % (total_blks, blk_sz, total_chunks)) 44 45 pos = 0 # in blocks 46 care_data = [] 47 self.offset_map = offset_map = [] 48 49 for i in range(total_chunks): 50 header_bin = f.read(12) 51 header = struct.unpack("<2H2I", header_bin) 52 chunk_type = header[0] 53 reserved1 = header[1] 54 chunk_sz = header[2] 55 total_sz = header[3] 56 data_sz = total_sz - 12 57 58 if chunk_type == 0xCAC1: 59 if data_sz != (chunk_sz * blk_sz): 60 raise ValueError( 61 "Raw chunk input size (%u) does not match output size (%u)" % 62 (data_sz, chunk_sz * blk_sz)) 63 else: 64 care_data.append(pos) 65 care_data.append(pos + chunk_sz) 66 offset_map.append((pos, chunk_sz, f.tell())) 67 pos += chunk_sz 68 f.seek(data_sz, os.SEEK_CUR) 69 70 elif chunk_type == 0xCAC2: 71 raise ValueError("Fill chunks are not supported") 72 73 elif chunk_type == 0xCAC3: 74 if data_sz != 0: 75 raise ValueError("Don't care chunk input size is non-zero (%u)" % 76 (data_sz)) 77 else: 78 pos += chunk_sz 79 80 elif chunk_type == 0xCAC4: 81 raise ValueError("CRC32 chunks are not supported") 82 83 else: 84 raise ValueError("Unknown chunk type 0x%04X not supported" % 85 (chunk_type,)) 86 87 self.care_map = RangeSet(care_data) 88 self.offset_index = [i[0] for i in offset_map] 89 90 if file_map_fn: 91 self.LoadFileBlockMap(file_map_fn) 92 else: 93 self.file_map = {"__DATA": self.care_map} 94 95 def ReadRangeSet(self, ranges): 96 return [d for d in self._GetRangeData(ranges)] 97 98 def TotalSha1(self): 99 """Return the SHA-1 hash of all data in the 'care' regions of this image.""" 100 h = sha1() 101 for d in self._GetRangeData(self.care_map): 102 h.update(d) 103 return h.hexdigest() 104 105 def _GetRangeData(self, ranges): 106 """Generator that produces all the image data in 'ranges'. The 107 number of individual pieces returned is arbitrary (and in 108 particular is not necessarily equal to the number of ranges in 109 'ranges'. 110 111 This generator is stateful -- it depends on the open file object 112 contained in this SparseImage, so you should not try to run two 113 instances of this generator on the same object simultaneously.""" 114 115 f = self.simg_f 116 for s, e in ranges: 117 to_read = e-s 118 idx = bisect.bisect_right(self.offset_index, s) - 1 119 chunk_start, chunk_len, filepos = self.offset_map[idx] 120 121 # for the first chunk we may be starting partway through it. 122 p = filepos + ((s - chunk_start) * self.blocksize) 123 remain = chunk_len - (s - chunk_start) 124 125 f.seek(p, os.SEEK_SET) 126 this_read = min(remain, to_read) 127 yield f.read(this_read * self.blocksize) 128 to_read -= this_read 129 130 while to_read > 0: 131 # continue with following chunks if this range spans multiple chunks. 132 idx += 1 133 chunk_start, chunk_len, filepos = self.offset_map[idx] 134 f.seek(filepos, os.SEEK_SET) 135 this_read = min(chunk_len, to_read) 136 yield f.read(this_read * self.blocksize) 137 to_read -= this_read 138 139 def LoadFileBlockMap(self, fn): 140 remaining = self.care_map 141 self.file_map = out = {} 142 143 with open(fn) as f: 144 for line in f: 145 fn, ranges = line.split(None, 1) 146 ranges = RangeSet.parse(ranges) 147 out[fn] = ranges 148 assert ranges.size() == ranges.intersect(remaining).size() 149 remaining = remaining.subtract(ranges) 150 151 # For all the remaining blocks in the care_map (ie, those that 152 # aren't part of the data for any file), divide them into blocks 153 # that are all zero and blocks that aren't. (Zero blocks are 154 # handled specially because (1) there are usually a lot of them 155 # and (2) bsdiff handles files with long sequences of repeated 156 # bytes especially poorly.) 157 158 zero_blocks = [] 159 nonzero_blocks = [] 160 reference = '\0' * self.blocksize 161 162 f = self.simg_f 163 for s, e in remaining: 164 for b in range(s, e): 165 idx = bisect.bisect_right(self.offset_index, b) - 1 166 chunk_start, chunk_len, filepos = self.offset_map[idx] 167 filepos += (b-chunk_start) * self.blocksize 168 f.seek(filepos, os.SEEK_SET) 169 data = f.read(self.blocksize) 170 171 if data == reference: 172 zero_blocks.append(b) 173 zero_blocks.append(b+1) 174 else: 175 nonzero_blocks.append(b) 176 nonzero_blocks.append(b+1) 177 178 out["__ZERO"] = RangeSet(data=zero_blocks) 179 out["__NONZERO"] = RangeSet(data=nonzero_blocks) 180 181 def ResetFileMap(self): 182 """Throw away the file map and treat the entire image as 183 undifferentiated data.""" 184 self.file_map = {"__DATA": self.care_map} 185