1"""Macintosh binhex compression/decompression. 2 3easy interface: 4binhex(inputfilename, outputfilename) 5hexbin(inputfilename, outputfilename) 6""" 7 8# 9# Jack Jansen, CWI, August 1995. 10# 11# The module is supposed to be as compatible as possible. Especially the 12# easy interface should work "as expected" on any platform. 13# XXXX Note: currently, textfiles appear in mac-form on all platforms. 14# We seem to lack a simple character-translate in python. 15# (we should probably use ISO-Latin-1 on all but the mac platform). 16# XXXX The simple routines are too simple: they expect to hold the complete 17# files in-core. Should be fixed. 18# XXXX It would be nice to handle AppleDouble format on unix 19# (for servers serving macs). 20# XXXX I don't understand what happens when you get 0x90 times the same byte on 21# input. The resulting code (xx 90 90) would appear to be interpreted as an 22# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... 23# 24import sys 25import os 26import struct 27import binascii 28 29__all__ = ["binhex","hexbin","Error"] 30 31class Error(Exception): 32 pass 33 34# States (what have we written) 35[_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3) 36 37# Various constants 38REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder 39LINELEN=64 40RUNCHAR=chr(0x90) # run-length introducer 41 42# 43# This code is no longer byte-order dependent 44 45# 46# Workarounds for non-mac machines. 47try: 48 from Carbon.File import FSSpec, FInfo 49 from MacOS import openrf 50 51 def getfileinfo(name): 52 finfo = FSSpec(name).FSpGetFInfo() 53 dir, file = os.path.split(name) 54 # XXX Get resource/data sizes 55 fp = open(name, 'rb') 56 fp.seek(0, 2) 57 dlen = fp.tell() 58 fp = openrf(name, '*rb') 59 fp.seek(0, 2) 60 rlen = fp.tell() 61 return file, finfo, dlen, rlen 62 63 def openrsrc(name, *mode): 64 if not mode: 65 mode = '*rb' 66 else: 67 mode = '*' + mode[0] 68 return openrf(name, mode) 69 70except ImportError: 71 # 72 # Glue code for non-macintosh usage 73 # 74 75 class FInfo: 76 def __init__(self): 77 self.Type = '????' 78 self.Creator = '????' 79 self.Flags = 0 80 81 def getfileinfo(name): 82 finfo = FInfo() 83 # Quick check for textfile 84 fp = open(name) 85 data = open(name).read(256) 86 for c in data: 87 if not c.isspace() and (c<' ' or ord(c) > 0x7f): 88 break 89 else: 90 finfo.Type = 'TEXT' 91 fp.seek(0, 2) 92 dsize = fp.tell() 93 fp.close() 94 dir, file = os.path.split(name) 95 file = file.replace(':', '-', 1) 96 return file, finfo, dsize, 0 97 98 class openrsrc: 99 def __init__(self, *args): 100 pass 101 102 def read(self, *args): 103 return '' 104 105 def write(self, *args): 106 pass 107 108 def close(self): 109 pass 110 111class _Hqxcoderengine: 112 """Write data to the coder in 3-byte chunks""" 113 114 def __init__(self, ofp): 115 self.ofp = ofp 116 self.data = '' 117 self.hqxdata = '' 118 self.linelen = LINELEN-1 119 120 def write(self, data): 121 self.data = self.data + data 122 datalen = len(self.data) 123 todo = (datalen//3)*3 124 data = self.data[:todo] 125 self.data = self.data[todo:] 126 if not data: 127 return 128 self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) 129 self._flush(0) 130 131 def _flush(self, force): 132 first = 0 133 while first <= len(self.hqxdata)-self.linelen: 134 last = first + self.linelen 135 self.ofp.write(self.hqxdata[first:last]+'\n') 136 self.linelen = LINELEN 137 first = last 138 self.hqxdata = self.hqxdata[first:] 139 if force: 140 self.ofp.write(self.hqxdata + ':\n') 141 142 def close(self): 143 if self.data: 144 self.hqxdata = \ 145 self.hqxdata + binascii.b2a_hqx(self.data) 146 self._flush(1) 147 self.ofp.close() 148 del self.ofp 149 150class _Rlecoderengine: 151 """Write data to the RLE-coder in suitably large chunks""" 152 153 def __init__(self, ofp): 154 self.ofp = ofp 155 self.data = '' 156 157 def write(self, data): 158 self.data = self.data + data 159 if len(self.data) < REASONABLY_LARGE: 160 return 161 rledata = binascii.rlecode_hqx(self.data) 162 self.ofp.write(rledata) 163 self.data = '' 164 165 def close(self): 166 if self.data: 167 rledata = binascii.rlecode_hqx(self.data) 168 self.ofp.write(rledata) 169 self.ofp.close() 170 del self.ofp 171 172class BinHex: 173 def __init__(self, name_finfo_dlen_rlen, ofp): 174 name, finfo, dlen, rlen = name_finfo_dlen_rlen 175 if type(ofp) == type(''): 176 ofname = ofp 177 ofp = open(ofname, 'w') 178 ofp.write('(This file must be converted with BinHex 4.0)\n\n:') 179 hqxer = _Hqxcoderengine(ofp) 180 self.ofp = _Rlecoderengine(hqxer) 181 self.crc = 0 182 if finfo is None: 183 finfo = FInfo() 184 self.dlen = dlen 185 self.rlen = rlen 186 self._writeinfo(name, finfo) 187 self.state = _DID_HEADER 188 189 def _writeinfo(self, name, finfo): 190 nl = len(name) 191 if nl > 63: 192 raise Error, 'Filename too long' 193 d = chr(nl) + name + '\0' 194 d2 = finfo.Type + finfo.Creator 195 196 # Force all structs to be packed with big-endian 197 d3 = struct.pack('>h', finfo.Flags) 198 d4 = struct.pack('>ii', self.dlen, self.rlen) 199 info = d + d2 + d3 + d4 200 self._write(info) 201 self._writecrc() 202 203 def _write(self, data): 204 self.crc = binascii.crc_hqx(data, self.crc) 205 self.ofp.write(data) 206 207 def _writecrc(self): 208 # XXXX Should this be here?? 209 # self.crc = binascii.crc_hqx('\0\0', self.crc) 210 if self.crc < 0: 211 fmt = '>h' 212 else: 213 fmt = '>H' 214 self.ofp.write(struct.pack(fmt, self.crc)) 215 self.crc = 0 216 217 def write(self, data): 218 if self.state != _DID_HEADER: 219 raise Error, 'Writing data at the wrong time' 220 self.dlen = self.dlen - len(data) 221 self._write(data) 222 223 def close_data(self): 224 if self.dlen != 0: 225 raise Error, 'Incorrect data size, diff=%r' % (self.rlen,) 226 self._writecrc() 227 self.state = _DID_DATA 228 229 def write_rsrc(self, data): 230 if self.state < _DID_DATA: 231 self.close_data() 232 if self.state != _DID_DATA: 233 raise Error, 'Writing resource data at the wrong time' 234 self.rlen = self.rlen - len(data) 235 self._write(data) 236 237 def close(self): 238 if self.state < _DID_DATA: 239 self.close_data() 240 if self.state != _DID_DATA: 241 raise Error, 'Close at the wrong time' 242 if self.rlen != 0: 243 raise Error, \ 244 "Incorrect resource-datasize, diff=%r" % (self.rlen,) 245 self._writecrc() 246 self.ofp.close() 247 self.state = None 248 del self.ofp 249 250def binhex(inp, out): 251 """(infilename, outfilename) - Create binhex-encoded copy of a file""" 252 finfo = getfileinfo(inp) 253 ofp = BinHex(finfo, out) 254 255 ifp = open(inp, 'rb') 256 # XXXX Do textfile translation on non-mac systems 257 while 1: 258 d = ifp.read(128000) 259 if not d: break 260 ofp.write(d) 261 ofp.close_data() 262 ifp.close() 263 264 ifp = openrsrc(inp, 'rb') 265 while 1: 266 d = ifp.read(128000) 267 if not d: break 268 ofp.write_rsrc(d) 269 ofp.close() 270 ifp.close() 271 272class _Hqxdecoderengine: 273 """Read data via the decoder in 4-byte chunks""" 274 275 def __init__(self, ifp): 276 self.ifp = ifp 277 self.eof = 0 278 279 def read(self, totalwtd): 280 """Read at least wtd bytes (or until EOF)""" 281 decdata = '' 282 wtd = totalwtd 283 # 284 # The loop here is convoluted, since we don't really now how 285 # much to decode: there may be newlines in the incoming data. 286 while wtd > 0: 287 if self.eof: return decdata 288 wtd = ((wtd+2)//3)*4 289 data = self.ifp.read(wtd) 290 # 291 # Next problem: there may not be a complete number of 292 # bytes in what we pass to a2b. Solve by yet another 293 # loop. 294 # 295 while 1: 296 try: 297 decdatacur, self.eof = \ 298 binascii.a2b_hqx(data) 299 break 300 except binascii.Incomplete: 301 pass 302 newdata = self.ifp.read(1) 303 if not newdata: 304 raise Error, \ 305 'Premature EOF on binhex file' 306 data = data + newdata 307 decdata = decdata + decdatacur 308 wtd = totalwtd - len(decdata) 309 if not decdata and not self.eof: 310 raise Error, 'Premature EOF on binhex file' 311 return decdata 312 313 def close(self): 314 self.ifp.close() 315 316class _Rledecoderengine: 317 """Read data via the RLE-coder""" 318 319 def __init__(self, ifp): 320 self.ifp = ifp 321 self.pre_buffer = '' 322 self.post_buffer = '' 323 self.eof = 0 324 325 def read(self, wtd): 326 if wtd > len(self.post_buffer): 327 self._fill(wtd-len(self.post_buffer)) 328 rv = self.post_buffer[:wtd] 329 self.post_buffer = self.post_buffer[wtd:] 330 return rv 331 332 def _fill(self, wtd): 333 self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4) 334 if self.ifp.eof: 335 self.post_buffer = self.post_buffer + \ 336 binascii.rledecode_hqx(self.pre_buffer) 337 self.pre_buffer = '' 338 return 339 340 # 341 # Obfuscated code ahead. We have to take care that we don't 342 # end up with an orphaned RUNCHAR later on. So, we keep a couple 343 # of bytes in the buffer, depending on what the end of 344 # the buffer looks like: 345 # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) 346 # '?\220' - Keep 2 bytes: repeated something-else 347 # '\220\0' - Escaped \220: Keep 2 bytes. 348 # '?\220?' - Complete repeat sequence: decode all 349 # otherwise: keep 1 byte. 350 # 351 mark = len(self.pre_buffer) 352 if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR: 353 mark = mark - 3 354 elif self.pre_buffer[-1] == RUNCHAR: 355 mark = mark - 2 356 elif self.pre_buffer[-2:] == RUNCHAR + '\0': 357 mark = mark - 2 358 elif self.pre_buffer[-2] == RUNCHAR: 359 pass # Decode all 360 else: 361 mark = mark - 1 362 363 self.post_buffer = self.post_buffer + \ 364 binascii.rledecode_hqx(self.pre_buffer[:mark]) 365 self.pre_buffer = self.pre_buffer[mark:] 366 367 def close(self): 368 self.ifp.close() 369 370class HexBin: 371 def __init__(self, ifp): 372 if type(ifp) == type(''): 373 ifp = open(ifp) 374 # 375 # Find initial colon. 376 # 377 while 1: 378 ch = ifp.read(1) 379 if not ch: 380 raise Error, "No binhex data found" 381 # Cater for \r\n terminated lines (which show up as \n\r, hence 382 # all lines start with \r) 383 if ch == '\r': 384 continue 385 if ch == ':': 386 break 387 if ch != '\n': 388 dummy = ifp.readline() 389 390 hqxifp = _Hqxdecoderengine(ifp) 391 self.ifp = _Rledecoderengine(hqxifp) 392 self.crc = 0 393 self._readheader() 394 395 def _read(self, len): 396 data = self.ifp.read(len) 397 self.crc = binascii.crc_hqx(data, self.crc) 398 return data 399 400 def _checkcrc(self): 401 filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff 402 #self.crc = binascii.crc_hqx('\0\0', self.crc) 403 # XXXX Is this needed?? 404 self.crc = self.crc & 0xffff 405 if filecrc != self.crc: 406 raise Error, 'CRC error, computed %x, read %x' \ 407 %(self.crc, filecrc) 408 self.crc = 0 409 410 def _readheader(self): 411 len = self._read(1) 412 fname = self._read(ord(len)) 413 rest = self._read(1+4+4+2+4+4) 414 self._checkcrc() 415 416 type = rest[1:5] 417 creator = rest[5:9] 418 flags = struct.unpack('>h', rest[9:11])[0] 419 self.dlen = struct.unpack('>l', rest[11:15])[0] 420 self.rlen = struct.unpack('>l', rest[15:19])[0] 421 422 self.FName = fname 423 self.FInfo = FInfo() 424 self.FInfo.Creator = creator 425 self.FInfo.Type = type 426 self.FInfo.Flags = flags 427 428 self.state = _DID_HEADER 429 430 def read(self, *n): 431 if self.state != _DID_HEADER: 432 raise Error, 'Read data at wrong time' 433 if n: 434 n = n[0] 435 n = min(n, self.dlen) 436 else: 437 n = self.dlen 438 rv = '' 439 while len(rv) < n: 440 rv = rv + self._read(n-len(rv)) 441 self.dlen = self.dlen - n 442 return rv 443 444 def close_data(self): 445 if self.state != _DID_HEADER: 446 raise Error, 'close_data at wrong time' 447 if self.dlen: 448 dummy = self._read(self.dlen) 449 self._checkcrc() 450 self.state = _DID_DATA 451 452 def read_rsrc(self, *n): 453 if self.state == _DID_HEADER: 454 self.close_data() 455 if self.state != _DID_DATA: 456 raise Error, 'Read resource data at wrong time' 457 if n: 458 n = n[0] 459 n = min(n, self.rlen) 460 else: 461 n = self.rlen 462 self.rlen = self.rlen - n 463 return self._read(n) 464 465 def close(self): 466 if self.rlen: 467 dummy = self.read_rsrc(self.rlen) 468 self._checkcrc() 469 self.state = _DID_RSRC 470 self.ifp.close() 471 472def hexbin(inp, out): 473 """(infilename, outfilename) - Decode binhexed file""" 474 ifp = HexBin(inp) 475 finfo = ifp.FInfo 476 if not out: 477 out = ifp.FName 478 479 ofp = open(out, 'wb') 480 # XXXX Do translation on non-mac systems 481 while 1: 482 d = ifp.read(128000) 483 if not d: break 484 ofp.write(d) 485 ofp.close() 486 ifp.close_data() 487 488 d = ifp.read_rsrc(128000) 489 if d: 490 ofp = openrsrc(out, 'wb') 491 ofp.write(d) 492 while 1: 493 d = ifp.read_rsrc(128000) 494 if not d: break 495 ofp.write(d) 496 ofp.close() 497 498 ifp.close() 499 500def _test(): 501 fname = sys.argv[1] 502 binhex(fname, fname+'.hqx') 503 hexbin(fname+'.hqx', fname+'.viahqx') 504 #hexbin(fname, fname+'.unpacked') 505 sys.exit(1) 506 507if __name__ == '__main__': 508 _test() 509