13ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""Interface to the liblzma compression library. 23ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 33ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThis module provides a class for reading and writing compressed files, 43ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclasses for incremental (de)compression, and convenience functions for 53ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaone-shot (de)compression. 63ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 73ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThese classes and functions support both the XZ and legacy LZMA 83ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdacontainer formats, as well as raw compressed data streams. 93ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda""" 103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda__all__ = [ 123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", 133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "CHECK_ID_MAX", "CHECK_UNKNOWN", 143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", 153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", 163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", 173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", 183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", 193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", 21e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda "open", "compress", "decompress", "is_check_supported", 223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda] 233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 24e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdaimport builtins 253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaimport io 265f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksagimport os 273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdafrom _lzma import * 28a425c3d5a264c556d31bdd88097c79246b533ea3Nadeem Vawdafrom _lzma import _encode_filter_properties, _decode_filter_properties 292dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrouimport _compression 303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_CLOSED = 0 333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ = 1 342dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou# Value 2 no longer used 353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_WRITE = 3 363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 382dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrouclass LZMAFile(_compression.BaseStream): 393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """A file object providing transparent LZMA (de)compression. 413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda An LZMAFile can act as a wrapper for an existing file object, or 433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda refer directly to a named file on disk. 443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Note that LZMAFile provides a *binary* file interface - data read 463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda is returned as bytes, and data to be written must be given as bytes. 473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def __init__(self, filename=None, mode="r", *, 5033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda format=None, check=-1, preset=None, filters=None): 5133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda """Open an LZMA-compressed file in binary mode. 523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 535f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag filename can be either an actual file name (given as a str, 545f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag bytes, or PathLike object), in which case the named file is 555f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag opened, or it can be an existing file object to read from or 565f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag write to. 573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 5842ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda mode can be "r" for reading (default), "w" for (over)writing, 5942ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda "x" for creating exclusively, or "a" for appending. These can 6042ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda equivalently be given as "rb", "wb", "xb" and "ab" respectively. 613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format specifies the container format to use for the file. 633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the 643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda default is FORMAT_XZ. 653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda check specifies the integrity check to use. This argument can 673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda only be used when opening a file for writing. For FORMAT_XZ, 683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not 693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda support integrity checks - for these formats, check must be 703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda omitted, or be CHECK_NONE. 713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda When opening a file for reading, the *preset* argument is not 733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda meaningful, and should be omitted. The *filters* argument should 743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda also be omitted, except when format is FORMAT_RAW (in which case 753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda it is required). 763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda When opening a file for writing, the settings used by the 783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda compressor can be specified either as a preset compression 793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda level (with the *preset* argument), or in detail as a custom 803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda filter chain (with the *filters* argument). For FORMAT_XZ and 813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset 823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda level. For FORMAT_RAW, the caller must always specify a filter 833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda chain; the raw compressor does not support preset compression 843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda levels. 853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda preset (if provided) should be an integer in the range 0-9, 873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optionally OR-ed with the constant PRESET_EXTREME. 883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda filters (if provided) should be a sequence of dicts. Each dict 903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda should have an entry for "id" indicating ID of the filter, plus 913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda additional entries for options to the filter. 923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp = None 943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = False 953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = _MODE_CLOSED 963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 976cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda if mode in ("r", "rb"): 983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if check != -1: 993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Cannot specify an integrity check " 1003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "when opening a file for reading") 1013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if preset is not None: 1023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Cannot specify a preset compression " 1033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "level when opening a file for reading") 1043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if format is None: 1053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format = FORMAT_AUTO 1063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda mode_code = _MODE_READ 10742ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda elif mode in ("w", "wb", "a", "ab", "x", "xb"): 1083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if format is None: 1093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda format = FORMAT_XZ 1103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda mode_code = _MODE_WRITE 1113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._compressor = LZMACompressor(format=format, check=check, 1123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda preset=preset, filters=filters) 1132dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou self._pos = 0 1143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 1153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise ValueError("Invalid mode: {!r}".format(mode)) 1163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1175f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag if isinstance(filename, (str, bytes, os.PathLike)): 1186cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda if "b" not in mode: 1196cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda mode += "b" 120e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda self._fp = builtins.open(filename, mode) 1213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = True 1223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = mode_code 12333c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda elif hasattr(filename, "read") or hasattr(filename, "write"): 12433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda self._fp = filename 1253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = mode_code 1263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda else: 1275f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag raise TypeError("filename must be a str, bytes, file or PathLike object") 1283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1292dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou if self._mode == _MODE_READ: 1302dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou raw = _compression.DecompressReader(self._fp, LZMADecompressor, 1312dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou trailing_error=LZMAError, format=format, filters=filters) 1322dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou self._buffer = io.BufferedReader(raw) 1332dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou 1343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def close(self): 1353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Flush and close the file. 1363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda May be called more than once without error. Once the file is 1383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda closed, any other operation on it will raise a ValueError. 1393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 1403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._mode == _MODE_CLOSED: 1413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return 1423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda try: 1432dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou if self._mode == _MODE_READ: 1442dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou self._buffer.close() 1452dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou self._buffer = None 1463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda elif self._mode == _MODE_WRITE: 1473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.write(self._compressor.flush()) 1483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._compressor = None 1493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda finally: 1503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda try: 1513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if self._closefp: 1523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.close() 1533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda finally: 1543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp = None 1553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._closefp = False 1563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._mode = _MODE_CLOSED 1573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda @property 1593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def closed(self): 1603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """True if this file is closed.""" 1613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._mode == _MODE_CLOSED 1623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def fileno(self): 1643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return the file descriptor for the underlying file.""" 1653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._fp.fileno() 1673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def seekable(self): 1693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file supports seeking.""" 1702dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self.readable() and self._buffer.seekable() 1713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def readable(self): 1733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file was opened for reading.""" 1743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1752dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._mode == _MODE_READ 1763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def writable(self): 1783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return whether the file was opened for writing.""" 1793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 1803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._mode == _MODE_WRITE 1813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def peek(self, size=-1): 1833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return buffered data without advancing the file position. 1843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Always returns at least one byte of data, unless at EOF. 1863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda The exact number of bytes returned is unspecified. 1873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 1883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 1892dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou # Relies on the undocumented fact that BufferedReader.peek() always 1902dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou # returns at least one byte (except at EOF) 1912dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.peek(size) 1923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def read(self, size=-1): 1943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Read up to size uncompressed bytes from the file. 1953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda If size is negative or omitted, read until EOF is reached. 1973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns b"" if the file is already at EOF. 1983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 1993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 2002dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.read(size) 2013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def read1(self, size=-1): 20337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda """Read up to size uncompressed bytes, while trying to avoid 2042dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou making multiple reads from the underlying stream. Reads up to a 2052dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou buffer's worth of data if size is negative. 2063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns b"" if the file is at EOF. 2083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 2093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_read() 2102dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou if size < 0: 2112dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou size = io.DEFAULT_BUFFER_SIZE 2122dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.read1(size) 2133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 214186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda def readline(self, size=-1): 215186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda """Read a line of uncompressed bytes from the file. 216186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 217186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda The terminating newline (if present) is retained. If size is 218186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda non-negative, no more than size bytes will be read (in which 219186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda case the line may be incomplete). Returns b'' if already at EOF. 220186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda """ 221186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda self._check_can_read() 2222dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.readline(size) 223186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda 2243ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def write(self, data): 2253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Write a bytes object to the file. 2263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns the number of uncompressed bytes written, which is 2283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda always len(data). Note that due to buffering, the file on disk 2293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda may not reflect the data written until close() is called. 2303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 2313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_write() 2323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda compressed = self._compressor.compress(data) 2333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._fp.write(compressed) 2343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._pos += len(data) 2353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return len(data) 2363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2372dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou def seek(self, offset, whence=io.SEEK_SET): 2383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Change the file position. 2393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda The new position is specified by offset, relative to the 2413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda position indicated by whence. Possible values for whence are: 2423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 0: start of stream (default): offset must not be negative 2443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 1: current stream position 2453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2: end of stream; offset must not be positive 2463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Returns the new file position. 2483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2492dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou Note that seeking is emulated, so depending on the parameters, 2503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda this operation may be extremely slow. 2513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 2523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_can_seek() 2532dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.seek(offset, whence) 2543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda def tell(self): 2563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Return the current file position.""" 2573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda self._check_not_closed() 2582dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou if self._mode == _MODE_READ: 2592dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou return self._buffer.tell() 2603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return self._pos 2613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 2623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 263e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdadef open(filename, mode="rb", *, 264e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda format=None, check=-1, preset=None, filters=None, 265e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda encoding=None, errors=None, newline=None): 266e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda """Open an LZMA-compressed file in binary or text mode. 267e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 2685f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag filename can be either an actual file name (given as a str, bytes, 2695f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag or PathLike object), in which case the named file is opened, or it 2705f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag can be an existing file object to read from or write to. 271e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 27242ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb", 27342ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text 27442ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda mode. 275e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 276801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda The format, check, preset and filters arguments specify the 277801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda compression settings, as for LZMACompressor, LZMADecompressor and 278801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda LZMAFile. 279e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 280801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For binary mode, this function is equivalent to the LZMAFile 281801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda constructor: LZMAFile(filename, mode, ...). In this case, the 282801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda encoding, errors and newline arguments must not be provided. 283e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 2846a7b3a77b4b2be0badd24ee5f0fdbaa2e0e79c3dSerhiy Storchaka For text mode, an LZMAFile object is created, and wrapped in an 285801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda io.TextIOWrapper instance with the specified encoding, error 286801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda handling behavior, and line ending(s). 287e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 288e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda """ 289e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "t" in mode: 290e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "b" in mode: 291e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Invalid mode: %r" % (mode,)) 292e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda else: 293e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if encoding is not None: 294e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'encoding' not supported in binary mode") 295e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if errors is not None: 296e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'errors' not supported in binary mode") 297e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if newline is not None: 298e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda raise ValueError("Argument 'newline' not supported in binary mode") 299e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 300e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda lz_mode = mode.replace("t", "") 301e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda binary_file = LZMAFile(filename, lz_mode, format=format, check=check, 302e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda preset=preset, filters=filters) 303e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 304e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda if "t" in mode: 305e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda return io.TextIOWrapper(binary_file, encoding, errors, newline) 306e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda else: 307e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda return binary_file 308e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 309e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda 3103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): 3113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Compress a block of data. 3123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Refer to LZMACompressor's docstring for a description of the 3143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optional arguments *format*, *check*, *preset* and *filters*. 3153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 316801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For incremental compression, use an LZMACompressor instead. 3173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 3183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda comp = LZMACompressor(format, check, preset, filters) 3193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda return comp.compress(data) + comp.flush() 3203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None): 3233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """Decompress a block of data. 3243ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 3253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda Refer to LZMADecompressor's docstring for a description of the 3263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda optional arguments *format*, *check* and *filters*. 3273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda 328801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda For incremental decompression, use an LZMADecompressor instead. 3293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda """ 3303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda results = [] 3313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda while True: 3323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda decomp = LZMADecompressor(format, memlimit, filters) 3339c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda try: 3349c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda res = decomp.decompress(data) 3359c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda except LZMAError: 3369c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda if results: 3379c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda break # Leftover data is not a valid LZMA/XZ stream; ignore it. 3389c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda else: 3399c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda raise # Error on the first iteration; bail out. 3409c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda results.append(res) 3413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda if not decomp.eof: 3423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda raise LZMAError("Compressed data ended before the " 3433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda "end-of-stream marker was reached") 3443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda data = decomp.unused_data 3459c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda if not data: 3469c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda break 3479c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda return b"".join(results) 348