lzma.py revision 186370b43331536808a46d1f31269ae3ba7bd63b
13ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""Interface to the liblzma compression library. 23ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 33ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThis module provides a class for reading and writing compressed files, 43ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclasses for incremental (de)compression, and convenience functions for 53ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaone-shot (de)compression. 63ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 73ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThese classes and functions support both the XZ and legacy LZMA 83ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdacontainer formats, as well as raw compressed data streams. 93ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda""" 103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda__all__ = [ 123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", 133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "CHECK_ID_MAX", "CHECK_UNKNOWN", 143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", 153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", 163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", 173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", 183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", 193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", 21e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda "open", "compress", "decompress", "is_check_supported", 223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda] 233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 24e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdaimport builtins 253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaimport io 263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdafrom _lzma import * 27a425c3d5a264c556d31bdd88097c79246b533ea3Nadeem Vawdafrom _lzma import _encode_filter_properties, _decode_filter_properties 283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_CLOSED = 0 313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ = 1 323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ_EOF = 2 333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_WRITE = 3 343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_BUFFER_SIZE = 8192 363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclass LZMAFile(io.BufferedIOBase): 393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """A file object providing transparent LZMA (de)compression. 413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda An LZMAFile can act as a wrapper for an existing file object, or 433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda refer directly to a named file on disk. 443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Note that LZMAFile provides a *binary* file interface - data read 463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda is returned as bytes, and data to be written must be given as bytes. 473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def __init__(self, filename=None, mode="r", *, 5033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda format=None, check=-1, preset=None, filters=None): 5133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda """Open an LZMA-compressed file in binary mode. 523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 5333c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda filename can be either an actual file name (given as a str or 5433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda bytes object), in which case the named file is opened, or it can 5533c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda be an existing file object to read from or write to. 563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda mode can be "r" for reading (default), "w" for (over)writing, or 58801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda "a" for appending. These can equivalently be given as "rb", "wb" 596cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda and "ab" respectively. 603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format specifies the container format to use for the file. 623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the 633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda default is FORMAT_XZ. 643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda check specifies the integrity check to use. This argument can 663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda only be used when opening a file for writing. For FORMAT_XZ, 673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not 683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda support integrity checks - for these formats, check must be 693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda omitted, or be CHECK_NONE. 703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda When opening a file for reading, the *preset* argument is not 723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda meaningful, and should be omitted. The *filters* argument should 733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda also be omitted, except when format is FORMAT_RAW (in which case 743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda it is required). 753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda When opening a file for writing, the settings used by the 773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda compressor can be specified either as a preset compression 783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda level (with the *preset* argument), or in detail as a custom 793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda filter chain (with the *filters* argument). For FORMAT_XZ and 803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset 813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda level. For FORMAT_RAW, the caller must always specify a filter 823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda chain; the raw compressor does not support preset compression 833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda levels. 843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda preset (if provided) should be an integer in the range 0-9, 863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optionally OR-ed with the constant PRESET_EXTREME. 873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda filters (if provided) should be a sequence of dicts. Each dict 893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda should have an entry for "id" indicating ID of the filter, plus 903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda additional entries for options to the filter. 913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp = None 933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = False 943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = _MODE_CLOSED 953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos = 0 963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._size = -1 973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 986cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda if mode in ("r", "rb"): 993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if check != -1: 1003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Cannot specify an integrity check " 1013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "when opening a file for reading") 1023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if preset is not None: 1033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Cannot specify a preset compression " 1043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "level when opening a file for reading") 1053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if format is None: 1063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format = FORMAT_AUTO 1073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda mode_code = _MODE_READ 1083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Save the args to pass to the LZMADecompressor initializer. 1093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # If the file contains multiple compressed streams, each 1103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # stream will need a separate decompressor object. 1113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._init_args = {"format":format, "filters":filters} 1123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._decompressor = LZMADecompressor(**self._init_args) 113186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 114186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 1156cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda elif mode in ("w", "wb", "a", "ab"): 1163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if format is None: 1173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format = FORMAT_XZ 1183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda mode_code = _MODE_WRITE 1193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._compressor = LZMACompressor(format=format, check=check, 1203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda preset=preset, filters=filters) 1213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 1223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Invalid mode: {!r}".format(mode)) 1233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 12433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda if isinstance(filename, (str, bytes)): 1256cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda if "b" not in mode: 1266cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda mode += "b" 127e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda self._fp = builtins.open(filename, mode) 1283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = True 1293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = mode_code 13033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda elif hasattr(filename, "read") or hasattr(filename, "write"): 13133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda self._fp = filename 1323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = mode_code 1333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 13433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda raise TypeError("filename must be a str or bytes object, or a file") 1353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def close(self): 1373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Flush and close the file. 1383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda May be called more than once without error. Once the file is 1403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda closed, any other operation on it will raise a ValueError. 1413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 1423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._mode == _MODE_CLOSED: 1433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return 1443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda try: 1453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._mode in (_MODE_READ, _MODE_READ_EOF): 1463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._decompressor = None 147186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 1483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda elif self._mode == _MODE_WRITE: 1493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.write(self._compressor.flush()) 1503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._compressor = None 1513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda finally: 1523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda try: 1533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._closefp: 1543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.close() 1553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda finally: 1563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp = None 1573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = False 1583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = _MODE_CLOSED 1593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda @property 1613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def closed(self): 1623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """True if this file is closed.""" 1633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._mode == _MODE_CLOSED 1643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def fileno(self): 1663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return the file descriptor for the underlying file.""" 1673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._fp.fileno() 1693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def seekable(self): 1713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file supports seeking.""" 172ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda return self.readable() and self._fp.seekable() 1733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def readable(self): 1753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file was opened for reading.""" 1763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._mode in (_MODE_READ, _MODE_READ_EOF) 1783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def writable(self): 1803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file was opened for writing.""" 1813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._mode == _MODE_WRITE 1833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Mode-checking helper functions. 1853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _check_not_closed(self): 1873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self.closed: 1883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("I/O operation on closed file") 1893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _check_can_read(self): 191186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if self._mode not in (_MODE_READ, _MODE_READ_EOF): 192186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._check_not_closed() 1933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise io.UnsupportedOperation("File not open for reading") 1943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _check_can_write(self): 196186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if self._mode != _MODE_WRITE: 197186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._check_not_closed() 1983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise io.UnsupportedOperation("File not open for writing") 1993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _check_can_seek(self): 201186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if self._mode not in (_MODE_READ, _MODE_READ_EOF): 202186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._check_not_closed() 2033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise io.UnsupportedOperation("Seeking is only supported " 2043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "on files open for reading") 205ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda if not self._fp.seekable(): 206ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda raise io.UnsupportedOperation("The underlying file object " 207ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda "does not support seeking") 2083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Fill the readahead buffer if it is empty. Returns False on EOF. 2103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _fill_buffer(self): 211186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if self._mode == _MODE_READ_EOF: 212186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return False 21337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # Depending on the input data, our call to the decompressor may not 21437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # return any data. In this case, try again after reading another block. 215186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda while self._buffer_offset == len(self._buffer): 216186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda rawblock = (self._decompressor.unused_data or 217186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._fp.read(_BUFFER_SIZE)) 21837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda 21937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda if not rawblock: 22037d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda if self._decompressor.eof: 22137d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda self._mode = _MODE_READ_EOF 22237d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda self._size = self._pos 22337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda return False 22437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda else: 22537d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda raise EOFError("Compressed file ended before the " 22637d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda "end-of-stream marker was reached") 22737d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda 22837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # Continue to next stream. 22937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda if self._decompressor.eof: 23037d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda self._decompressor = LZMADecompressor(**self._init_args) 2313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 23237d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda self._buffer = self._decompressor.decompress(rawblock) 233186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 234186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return True 2353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Read data until EOF. 2373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # If return_data is false, consume the data without returning it. 2383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _read_all(self, return_data=True): 239186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # The loop assumes that _buffer_offset is 0. Ensure that this is true. 240186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = self._buffer[self._buffer_offset:] 241186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 242186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 2433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda blocks = [] 2443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda while self._fill_buffer(): 2453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if return_data: 2463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda blocks.append(self._buffer) 2473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos += len(self._buffer) 248186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 2493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if return_data: 2503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"".join(blocks) 2513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Read a block of up to n bytes. 2533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # If return_data is false, consume the data without returning it. 2543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _read_block(self, n, return_data=True): 255186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # If we have enough data buffered, return immediately. 256186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda end = self._buffer_offset + n 257186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if end <= len(self._buffer): 258186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda data = self._buffer[self._buffer_offset : end] 259186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = end 260186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._pos += len(data) 261186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return data if return_data else None 262186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 263186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # The loop assumes that _buffer_offset is 0. Ensure that this is true. 264186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = self._buffer[self._buffer_offset:] 265186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 266186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 2673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda blocks = [] 2683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda while n > 0 and self._fill_buffer(): 2693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if n < len(self._buffer): 2703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda data = self._buffer[:n] 271186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = n 2723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 2733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda data = self._buffer 274186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 2753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if return_data: 2763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda blocks.append(data) 2773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos += len(data) 2783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda n -= len(data) 2793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if return_data: 2803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"".join(blocks) 2813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def peek(self, size=-1): 2833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return buffered data without advancing the file position. 2843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Always returns at least one byte of data, unless at EOF. 2863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda The exact number of bytes returned is unspecified. 2873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 2883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 289186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if not self._fill_buffer(): 2903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"" 291186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return self._buffer[self._buffer_offset:] 2923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def read(self, size=-1): 2943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Read up to size uncompressed bytes from the file. 2953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda If size is negative or omitted, read until EOF is reached. 2973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns b"" if the file is already at EOF. 2983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 2993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 300186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if size == 0: 3013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"" 3023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda elif size < 0: 3033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._read_all() 3043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 3053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._read_block(size) 3063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def read1(self, size=-1): 30837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda """Read up to size uncompressed bytes, while trying to avoid 30937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda making multiple reads from the underlying stream. 3103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns b"" if the file is at EOF. 3123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 31337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # Usually, read1() calls _fp.read() at most once. However, sometimes 31437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # this does not give enough data for the decompressor to make progress. 31537d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda # In this case we make multiple reads, to avoid returning b"". 3163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 317186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if (size == 0 or 318186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # Only call _fill_buffer() if the buffer is actually empty. 319186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # This gives a significant speedup if *size* is small. 320186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda (self._buffer_offset == len(self._buffer) and not self._fill_buffer())): 3213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"" 322186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if size > 0: 323186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda data = self._buffer[self._buffer_offset : 324186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset + size] 325186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset += len(data) 3263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 327186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda data = self._buffer[self._buffer_offset:] 328186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 329186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 3303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos += len(data) 3313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return data 3323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 333186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda def readline(self, size=-1): 334186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda """Read a line of uncompressed bytes from the file. 335186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 336186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda The terminating newline (if present) is retained. If size is 337186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda non-negative, no more than size bytes will be read (in which 338186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda case the line may be incomplete). Returns b'' if already at EOF. 339186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda """ 340186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._check_can_read() 341186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda # Shortcut for the common case - the whole line is in the buffer. 342186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if size < 0: 343186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda end = self._buffer.find(b"\n", self._buffer_offset) + 1 344186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda if end > 0: 345186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda line = self._buffer[self._buffer_offset : end] 346186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = end 347186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._pos += len(line) 348186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return line 349186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda return io.BufferedIOBase.readline(self, size) 350186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 3513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def write(self, data): 3523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Write a bytes object to the file. 3533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns the number of uncompressed bytes written, which is 3553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda always len(data). Note that due to buffering, the file on disk 3563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda may not reflect the data written until close() is called. 3573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 3583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_write() 3593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda compressed = self._compressor.compress(data) 3603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.write(compressed) 3613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos += len(data) 3623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return len(data) 3633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Rewind the file to the beginning of the data stream. 3653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def _rewind(self): 3663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.seek(0, 0) 3673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = _MODE_READ 3683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos = 0 3693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._decompressor = LZMADecompressor(**self._init_args) 370186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer = b"" 371186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._buffer_offset = 0 3723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def seek(self, offset, whence=0): 3743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Change the file position. 3753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda The new position is specified by offset, relative to the 3773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda position indicated by whence. Possible values for whence are: 3783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 0: start of stream (default): offset must not be negative 3803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1: current stream position 3813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2: end of stream; offset must not be positive 3823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns the new file position. 3843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Note that seeking is emulated, sp depending on the parameters, 3863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda this operation may be extremely slow. 3873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 3883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_seek() 3893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Recalculate offset as an absolute file position. 3913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if whence == 0: 3923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda pass 3933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda elif whence == 1: 3943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda offset = self._pos + offset 3953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda elif whence == 2: 3963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Seeking relative to EOF - we need to know the file's size. 3973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._size < 0: 3983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._read_all(return_data=False) 3993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda offset = self._size + offset 4003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 4013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Invalid value for whence: {}".format(whence)) 4023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Make it so that offset is the number of bytes to skip forward. 4043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if offset < self._pos: 4053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._rewind() 4063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 4073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda offset -= self._pos 4083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # Read and discard data until we reach the desired position. 410186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._read_block(offset, return_data=False) 4113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._pos 4133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def tell(self): 4153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return the current file position.""" 4163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 4173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._pos 4183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 420e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdadef open(filename, mode="rb", *, 421e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda format=None, check=-1, preset=None, filters=None, 422e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda encoding=None, errors=None, newline=None): 423e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda """Open an LZMA-compressed file in binary or text mode. 424e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 425801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda filename can be either an actual file name (given as a str or bytes 426801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda object), in which case the named file is opened, or it can be an 427801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda existing file object to read from or write to. 428e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 429801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda The mode argument can be "r", "rb" (default), "w", "wb", "a" or "ab" 430801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda for binary mode, or "rt", "wt" or "at" for text mode. 431e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 432801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda The format, check, preset and filters arguments specify the 433801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda compression settings, as for LZMACompressor, LZMADecompressor and 434801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda LZMAFile. 435e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 436801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For binary mode, this function is equivalent to the LZMAFile 437801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda constructor: LZMAFile(filename, mode, ...). In this case, the 438801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda encoding, errors and newline arguments must not be provided. 439e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 440e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda For text mode, a LZMAFile object is created, and wrapped in an 441801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda io.TextIOWrapper instance with the specified encoding, error 442801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda handling behavior, and line ending(s). 443e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 444e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda """ 445e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "t" in mode: 446e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "b" in mode: 447e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Invalid mode: %r" % (mode,)) 448e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda else: 449e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if encoding is not None: 450e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'encoding' not supported in binary mode") 451e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if errors is not None: 452e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'errors' not supported in binary mode") 453e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if newline is not None: 454e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'newline' not supported in binary mode") 455e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 456e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda lz_mode = mode.replace("t", "") 457e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda binary_file = LZMAFile(filename, lz_mode, format=format, check=check, 458e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda preset=preset, filters=filters) 459e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 460e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "t" in mode: 461e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda return io.TextIOWrapper(binary_file, encoding, errors, newline) 462e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda else: 463e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda return binary_file 464e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 465e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 4663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): 4673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Compress a block of data. 4683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Refer to LZMACompressor's docstring for a description of the 4703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optional arguments *format*, *check*, *preset* and *filters*. 4713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 472801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For incremental compression, use an LZMACompressor instead. 4733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 4743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda comp = LZMACompressor(format, check, preset, filters) 4753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return comp.compress(data) + comp.flush() 4763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None): 4793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Decompress a block of data. 4803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 4813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Refer to LZMADecompressor's docstring for a description of the 4823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optional arguments *format*, *check* and *filters*. 4833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 484801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For incremental decompression, use an LZMADecompressor instead. 4853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 4863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda results = [] 4873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda while True: 4883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda decomp = LZMADecompressor(format, memlimit, filters) 4893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda results.append(decomp.decompress(data)) 4903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if not decomp.eof: 4913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise LZMAError("Compressed data ended before the " 4923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "end-of-stream marker was reached") 4933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if not decomp.unused_data: 4943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return b"".join(results) 4953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda # There is unused data left over. Proceed to next stream. 4963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda data = decomp.unused_data 497