lzma.py revision 186370b43331536808a46d1f31269ae3ba7bd63b
13ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""Interface to the liblzma compression library.
23ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
33ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThis module provides a class for reading and writing compressed files,
43ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclasses for incremental (de)compression, and convenience functions for
53ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaone-shot (de)compression.
63ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
73ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThese classes and functions support both the XZ and legacy LZMA
83ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdacontainer formats, as well as raw compressed data streams.
93ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""
103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda__all__ = [
123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "CHECK_ID_MAX", "CHECK_UNKNOWN",
143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
21e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    "open", "compress", "decompress", "is_check_supported",
223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda]
233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
24e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdaimport builtins
253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaimport io
263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdafrom _lzma import *
27a425c3d5a264c556d31bdd88097c79246b533ea3Nadeem Vawdafrom _lzma import _encode_filter_properties, _decode_filter_properties
283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_CLOSED   = 0
313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ     = 1
323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ_EOF = 2
333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_WRITE    = 3
343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_BUFFER_SIZE = 8192
363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclass LZMAFile(io.BufferedIOBase):
393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """A file object providing transparent LZMA (de)compression.
413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    An LZMAFile can act as a wrapper for an existing file object, or
433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    refer directly to a named file on disk.
443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Note that LZMAFile provides a *binary* file interface - data read
463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    is returned as bytes, and data to be written must be given as bytes.
473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def __init__(self, filename=None, mode="r", *,
5033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda                 format=None, check=-1, preset=None, filters=None):
5133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        """Open an LZMA-compressed file in binary mode.
523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
5333c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        filename can be either an actual file name (given as a str or
5433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        bytes object), in which case the named file is opened, or it can
5533c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        be an existing file object to read from or write to.
563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        mode can be "r" for reading (default), "w" for (over)writing, or
58801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda        "a" for appending. These can equivalently be given as "rb", "wb"
596cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda        and "ab" respectively.
603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        format specifies the container format to use for the file.
623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        default is FORMAT_XZ.
643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        check specifies the integrity check to use. This argument can
663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        only be used when opening a file for writing. For FORMAT_XZ,
673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        support integrity checks - for these formats, check must be
693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        omitted, or be CHECK_NONE.
703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        When opening a file for reading, the *preset* argument is not
723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        meaningful, and should be omitted. The *filters* argument should
733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        also be omitted, except when format is FORMAT_RAW (in which case
743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        it is required).
753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        When opening a file for writing, the settings used by the
773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        compressor can be specified either as a preset compression
783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        level (with the *preset* argument), or in detail as a custom
793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        filter chain (with the *filters* argument). For FORMAT_XZ and
803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        level. For FORMAT_RAW, the caller must always specify a filter
823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        chain; the raw compressor does not support preset compression
833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        levels.
843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        preset (if provided) should be an integer in the range 0-9,
863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        optionally OR-ed with the constant PRESET_EXTREME.
873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        filters (if provided) should be a sequence of dicts. Each dict
893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        should have an entry for "id" indicating ID of the filter, plus
903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        additional entries for options to the filter.
913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._fp = None
933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._closefp = False
943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._mode = _MODE_CLOSED
953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._pos = 0
963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._size = -1
973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
986cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda        if mode in ("r", "rb"):
993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if check != -1:
1003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                raise ValueError("Cannot specify an integrity check "
1013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                 "when opening a file for reading")
1023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if preset is not None:
1033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                raise ValueError("Cannot specify a preset compression "
1043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                 "level when opening a file for reading")
1053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if format is None:
1063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                format = FORMAT_AUTO
1073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            mode_code = _MODE_READ
1083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            # Save the args to pass to the LZMADecompressor initializer.
1093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            # If the file contains multiple compressed streams, each
1103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            # stream will need a separate decompressor object.
1113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._init_args = {"format":format, "filters":filters}
1123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._decompressor = LZMADecompressor(**self._init_args)
113186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer = b""
114186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer_offset = 0
1156cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda        elif mode in ("w", "wb", "a", "ab"):
1163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if format is None:
1173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                format = FORMAT_XZ
1183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            mode_code = _MODE_WRITE
1193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._compressor = LZMACompressor(format=format, check=check,
1203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                              preset=preset, filters=filters)
1213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
1223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise ValueError("Invalid mode: {!r}".format(mode))
1233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
12433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        if isinstance(filename, (str, bytes)):
1256cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda            if "b" not in mode:
1266cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda                mode += "b"
127e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            self._fp = builtins.open(filename, mode)
1283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._closefp = True
1293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._mode = mode_code
13033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        elif hasattr(filename, "read") or hasattr(filename, "write"):
13133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda            self._fp = filename
1323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._mode = mode_code
1333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
13433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda            raise TypeError("filename must be a str or bytes object, or a file")
1353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def close(self):
1373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Flush and close the file.
1383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        May be called more than once without error. Once the file is
1403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        closed, any other operation on it will raise a ValueError.
1413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
1423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if self._mode == _MODE_CLOSED:
1433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return
1443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        try:
1453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if self._mode in (_MODE_READ, _MODE_READ_EOF):
1463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._decompressor = None
147186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                self._buffer = b""
1483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            elif self._mode == _MODE_WRITE:
1493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._fp.write(self._compressor.flush())
1503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._compressor = None
1513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        finally:
1523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            try:
1533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                if self._closefp:
1543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                    self._fp.close()
1553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            finally:
1563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._fp = None
1573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._closefp = False
1583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._mode = _MODE_CLOSED
1593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    @property
1613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def closed(self):
1623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """True if this file is closed."""
1633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._mode == _MODE_CLOSED
1643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def fileno(self):
1663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return the file descriptor for the underlying file."""
1673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._fp.fileno()
1693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def seekable(self):
1713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file supports seeking."""
172ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda        return self.readable() and self._fp.seekable()
1733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def readable(self):
1753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file was opened for reading."""
1763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._mode in (_MODE_READ, _MODE_READ_EOF)
1783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def writable(self):
1803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file was opened for writing."""
1813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._mode == _MODE_WRITE
1833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # Mode-checking helper functions.
1853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _check_not_closed(self):
1873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if self.closed:
1883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise ValueError("I/O operation on closed file")
1893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _check_can_read(self):
191186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if self._mode not in (_MODE_READ, _MODE_READ_EOF):
192186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._check_not_closed()
1933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise io.UnsupportedOperation("File not open for reading")
1943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _check_can_write(self):
196186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if self._mode != _MODE_WRITE:
197186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._check_not_closed()
1983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise io.UnsupportedOperation("File not open for writing")
1993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _check_can_seek(self):
201186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if self._mode not in (_MODE_READ, _MODE_READ_EOF):
202186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._check_not_closed()
2033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise io.UnsupportedOperation("Seeking is only supported "
2043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                          "on files open for reading")
205ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda        if not self._fp.seekable():
206ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda            raise io.UnsupportedOperation("The underlying file object "
207ae557d767fa0862188a17914eb07b74088ed4d29Nadeem Vawda                                          "does not support seeking")
2083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # Fill the readahead buffer if it is empty. Returns False on EOF.
2103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _fill_buffer(self):
211186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if self._mode == _MODE_READ_EOF:
212186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            return False
21337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        # Depending on the input data, our call to the decompressor may not
21437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        # return any data. In this case, try again after reading another block.
215186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        while self._buffer_offset == len(self._buffer):
216186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            rawblock = (self._decompressor.unused_data or
217186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                        self._fp.read(_BUFFER_SIZE))
21837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda
21937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda            if not rawblock:
22037d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                if self._decompressor.eof:
22137d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                    self._mode = _MODE_READ_EOF
22237d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                    self._size = self._pos
22337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                    return False
22437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                else:
22537d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                    raise EOFError("Compressed file ended before the "
22637d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                                   "end-of-stream marker was reached")
22737d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda
22837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda            # Continue to next stream.
22937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda            if self._decompressor.eof:
23037d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda                self._decompressor = LZMADecompressor(**self._init_args)
2313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
23237d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda            self._buffer = self._decompressor.decompress(rawblock)
233186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer_offset = 0
234186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        return True
2353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # Read data until EOF.
2373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # If return_data is false, consume the data without returning it.
2383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _read_all(self, return_data=True):
239186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        # The loop assumes that _buffer_offset is 0. Ensure that this is true.
240186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer = self._buffer[self._buffer_offset:]
241186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer_offset = 0
242186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
2433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        blocks = []
2443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        while self._fill_buffer():
2453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if return_data:
2463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                blocks.append(self._buffer)
2473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._pos += len(self._buffer)
248186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer = b""
2493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if return_data:
2503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b"".join(blocks)
2513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # Read a block of up to n bytes.
2533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # If return_data is false, consume the data without returning it.
2543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _read_block(self, n, return_data=True):
255186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        # If we have enough data buffered, return immediately.
256186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        end = self._buffer_offset + n
257186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if end <= len(self._buffer):
258186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            data = self._buffer[self._buffer_offset : end]
259186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer_offset = end
260186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._pos += len(data)
261186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            return data if return_data else None
262186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
263186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        # The loop assumes that _buffer_offset is 0. Ensure that this is true.
264186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer = self._buffer[self._buffer_offset:]
265186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer_offset = 0
266186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
2673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        blocks = []
2683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        while n > 0 and self._fill_buffer():
2693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if n < len(self._buffer):
2703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                data = self._buffer[:n]
271186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                self._buffer_offset = n
2723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            else:
2733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                data = self._buffer
274186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                self._buffer = b""
2753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if return_data:
2763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                blocks.append(data)
2773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._pos += len(data)
2783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            n -= len(data)
2793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if return_data:
2803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b"".join(blocks)
2813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def peek(self, size=-1):
2833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return buffered data without advancing the file position.
2843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Always returns at least one byte of data, unless at EOF.
2863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        The exact number of bytes returned is unspecified.
2873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
2883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
289186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if not self._fill_buffer():
2903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b""
291186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        return self._buffer[self._buffer_offset:]
2923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def read(self, size=-1):
2943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Read up to size uncompressed bytes from the file.
2953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        If size is negative or omitted, read until EOF is reached.
2973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns b"" if the file is already at EOF.
2983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
2993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
300186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if size == 0:
3013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b""
3023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        elif size < 0:
3033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return self._read_all()
3043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
3053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return self._read_block(size)
3063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def read1(self, size=-1):
30837d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        """Read up to size uncompressed bytes, while trying to avoid
30937d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        making multiple reads from the underlying stream.
3103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns b"" if the file is at EOF.
3123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
31337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        # Usually, read1() calls _fp.read() at most once. However, sometimes
31437d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        # this does not give enough data for the decompressor to make progress.
31537d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        # In this case we make multiple reads, to avoid returning b"".
3163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
317186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if (size == 0 or
318186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            # Only call _fill_buffer() if the buffer is actually empty.
319186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            # This gives a significant speedup if *size* is small.
320186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
3213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b""
322186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if size > 0:
323186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            data = self._buffer[self._buffer_offset :
324186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                                self._buffer_offset + size]
325186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer_offset += len(data)
3263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
327186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            data = self._buffer[self._buffer_offset:]
328186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer = b""
329186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            self._buffer_offset = 0
3303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._pos += len(data)
3313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return data
3323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
333186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda    def readline(self, size=-1):
334186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        """Read a line of uncompressed bytes from the file.
335186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
336186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        The terminating newline (if present) is retained. If size is
337186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        non-negative, no more than size bytes will be read (in which
338186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        case the line may be incomplete). Returns b'' if already at EOF.
339186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        """
340186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._check_can_read()
341186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        # Shortcut for the common case - the whole line is in the buffer.
342186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        if size < 0:
343186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            end = self._buffer.find(b"\n", self._buffer_offset) + 1
344186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda            if end > 0:
345186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                line = self._buffer[self._buffer_offset : end]
346186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                self._buffer_offset = end
347186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                self._pos += len(line)
348186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda                return line
349186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        return io.BufferedIOBase.readline(self, size)
350186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
3513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def write(self, data):
3523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Write a bytes object to the file.
3533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns the number of uncompressed bytes written, which is
3553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        always len(data). Note that due to buffering, the file on disk
3563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        may not reflect the data written until close() is called.
3573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
3583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_write()
3593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        compressed = self._compressor.compress(data)
3603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._fp.write(compressed)
3613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._pos += len(data)
3623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return len(data)
3633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    # Rewind the file to the beginning of the data stream.
3653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def _rewind(self):
3663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._fp.seek(0, 0)
3673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._mode = _MODE_READ
3683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._pos = 0
3693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._decompressor = LZMADecompressor(**self._init_args)
370186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer = b""
371186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._buffer_offset = 0
3723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def seek(self, offset, whence=0):
3743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Change the file position.
3753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        The new position is specified by offset, relative to the
3773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        position indicated by whence. Possible values for whence are:
3783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            0: start of stream (default): offset must not be negative
3803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            1: current stream position
3813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            2: end of stream; offset must not be positive
3823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns the new file position.
3843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Note that seeking is emulated, sp depending on the parameters,
3863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        this operation may be extremely slow.
3873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
3883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_seek()
3893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        # Recalculate offset as an absolute file position.
3913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if whence == 0:
3923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            pass
3933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        elif whence == 1:
3943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            offset = self._pos + offset
3953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        elif whence == 2:
3963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            # Seeking relative to EOF - we need to know the file's size.
3973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if self._size < 0:
3983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._read_all(return_data=False)
3993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            offset = self._size + offset
4003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
4013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise ValueError("Invalid value for whence: {}".format(whence))
4023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        # Make it so that offset is the number of bytes to skip forward.
4043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if offset < self._pos:
4053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._rewind()
4063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
4073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            offset -= self._pos
4083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        # Read and discard data until we reach the desired position.
410186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._read_block(offset, return_data=False)
4113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._pos
4133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def tell(self):
4153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return the current file position."""
4163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
4173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._pos
4183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
420e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdadef open(filename, mode="rb", *,
421e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda         format=None, check=-1, preset=None, filters=None,
422e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda         encoding=None, errors=None, newline=None):
423e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    """Open an LZMA-compressed file in binary or text mode.
424e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
425801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    filename can be either an actual file name (given as a str or bytes
426801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    object), in which case the named file is opened, or it can be an
427801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    existing file object to read from or write to.
428e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
429801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    The mode argument can be "r", "rb" (default), "w", "wb", "a" or "ab"
430801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    for binary mode, or "rt", "wt" or "at" for text mode.
431e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
432801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    The format, check, preset and filters arguments specify the
433801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    compression settings, as for LZMACompressor, LZMADecompressor and
434801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    LZMAFile.
435e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
436801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For binary mode, this function is equivalent to the LZMAFile
437801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    constructor: LZMAFile(filename, mode, ...). In this case, the
438801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    encoding, errors and newline arguments must not be provided.
439e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
440e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    For text mode, a LZMAFile object is created, and wrapped in an
441801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    io.TextIOWrapper instance with the specified encoding, error
442801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    handling behavior, and line ending(s).
443e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
444e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    """
445e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    if "t" in mode:
446e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if "b" in mode:
447e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Invalid mode: %r" % (mode,))
448e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    else:
449e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if encoding is not None:
450e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'encoding' not supported in binary mode")
451e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if errors is not None:
452e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'errors' not supported in binary mode")
453e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if newline is not None:
454e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'newline' not supported in binary mode")
455e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
456e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    lz_mode = mode.replace("t", "")
457e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
458e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda                           preset=preset, filters=filters)
459e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
460e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    if "t" in mode:
461e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        return io.TextIOWrapper(binary_file, encoding, errors, newline)
462e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    else:
463e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        return binary_file
464e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
465e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
4663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
4673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """Compress a block of data.
4683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Refer to LZMACompressor's docstring for a description of the
4703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    optional arguments *format*, *check*, *preset* and *filters*.
4713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
472801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For incremental compression, use an LZMACompressor instead.
4733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
4743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    comp = LZMACompressor(format, check, preset, filters)
4753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    return comp.compress(data) + comp.flush()
4763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
4793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """Decompress a block of data.
4803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
4813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Refer to LZMADecompressor's docstring for a description of the
4823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    optional arguments *format*, *check* and *filters*.
4833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
484801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For incremental decompression, use an LZMADecompressor instead.
4853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
4863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    results = []
4873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    while True:
4883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        decomp = LZMADecompressor(format, memlimit, filters)
4893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        results.append(decomp.decompress(data))
4903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if not decomp.eof:
4913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise LZMAError("Compressed data ended before the "
4923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                            "end-of-stream marker was reached")
4933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if not decomp.unused_data:
4943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return b"".join(results)
4953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        # There is unused data left over. Proceed to next stream.
4963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        data = decomp.unused_data
497