13ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""Interface to the liblzma compression library.
23ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
33ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThis module provides a class for reading and writing compressed files,
43ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaclasses for incremental (de)compression, and convenience functions for
53ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaone-shot (de)compression.
63ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
73ff069ebc6884c46c3f99ea61919f7728708c571Nadeem VawdaThese classes and functions support both the XZ and legacy LZMA
83ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdacontainer formats, as well as raw compressed data streams.
93ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda"""
103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda__all__ = [
123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "CHECK_ID_MAX", "CHECK_UNKNOWN",
143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
21e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    "open", "compress", "decompress", "is_check_supported",
223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda]
233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
24e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdaimport builtins
253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdaimport io
265f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksagimport os
273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdafrom _lzma import *
28a425c3d5a264c556d31bdd88097c79246b533ea3Nadeem Vawdafrom _lzma import _encode_filter_properties, _decode_filter_properties
292dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrouimport _compression
303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_CLOSED   = 0
333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_READ     = 1
342dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou# Value 2 no longer used
353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda_MODE_WRITE    = 3
363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
382dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrouclass LZMAFile(_compression.BaseStream):
393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """A file object providing transparent LZMA (de)compression.
413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    An LZMAFile can act as a wrapper for an existing file object, or
433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    refer directly to a named file on disk.
443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Note that LZMAFile provides a *binary* file interface - data read
463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    is returned as bytes, and data to be written must be given as bytes.
473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def __init__(self, filename=None, mode="r", *,
5033c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda                 format=None, check=-1, preset=None, filters=None):
5133c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        """Open an LZMA-compressed file in binary mode.
523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
535f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag        filename can be either an actual file name (given as a str,
545f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag        bytes, or PathLike object), in which case the named file is
555f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag        opened, or it can be an existing file object to read from or
565f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag        write to.
573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
5842ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda        mode can be "r" for reading (default), "w" for (over)writing,
5942ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda        "x" for creating exclusively, or "a" for appending. These can
6042ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda        equivalently be given as "rb", "wb", "xb" and "ab" respectively.
613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        format specifies the container format to use for the file.
633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        default is FORMAT_XZ.
653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        check specifies the integrity check to use. This argument can
673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        only be used when opening a file for writing. For FORMAT_XZ,
683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        support integrity checks - for these formats, check must be
703ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        omitted, or be CHECK_NONE.
713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        When opening a file for reading, the *preset* argument is not
733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        meaningful, and should be omitted. The *filters* argument should
743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        also be omitted, except when format is FORMAT_RAW (in which case
753ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        it is required).
763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        When opening a file for writing, the settings used by the
783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        compressor can be specified either as a preset compression
793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        level (with the *preset* argument), or in detail as a custom
803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        filter chain (with the *filters* argument). For FORMAT_XZ and
813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        level. For FORMAT_RAW, the caller must always specify a filter
833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        chain; the raw compressor does not support preset compression
843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        levels.
853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        preset (if provided) should be an integer in the range 0-9,
873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        optionally OR-ed with the constant PRESET_EXTREME.
883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
893ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        filters (if provided) should be a sequence of dicts. Each dict
903ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        should have an entry for "id" indicating ID of the filter, plus
913ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        additional entries for options to the filter.
923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._fp = None
943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._closefp = False
953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._mode = _MODE_CLOSED
963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
976cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda        if mode in ("r", "rb"):
983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if check != -1:
993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                raise ValueError("Cannot specify an integrity check "
1003ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                 "when opening a file for reading")
1013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if preset is not None:
1023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                raise ValueError("Cannot specify a preset compression "
1033ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                 "level when opening a file for reading")
1043ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if format is None:
1053ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                format = FORMAT_AUTO
1063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            mode_code = _MODE_READ
10742ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda        elif mode in ("w", "wb", "a", "ab", "x", "xb"):
1083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            if format is None:
1093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                format = FORMAT_XZ
1103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            mode_code = _MODE_WRITE
1113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._compressor = LZMACompressor(format=format, check=check,
1123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                                              preset=preset, filters=filters)
1132dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            self._pos = 0
1143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
1153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise ValueError("Invalid mode: {!r}".format(mode))
1163ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1175f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag        if isinstance(filename, (str, bytes, os.PathLike)):
1186cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda            if "b" not in mode:
1196cbb20cdf61329ebfa6afcacad21ee6252fb5be5Nadeem Vawda                mode += "b"
120e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            self._fp = builtins.open(filename, mode)
1213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._closefp = True
1223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._mode = mode_code
12333c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda        elif hasattr(filename, "read") or hasattr(filename, "write"):
12433c34da5745f2e3fdc315e5098295621d8023674Nadeem Vawda            self._fp = filename
1253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            self._mode = mode_code
1263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        else:
1275f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag            raise TypeError("filename must be a str, bytes, file or PathLike object")
1283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1292dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        if self._mode == _MODE_READ:
1302dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            raw = _compression.DecompressReader(self._fp, LZMADecompressor,
1312dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou                trailing_error=LZMAError, format=format, filters=filters)
1322dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            self._buffer = io.BufferedReader(raw)
1332dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou
1343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def close(self):
1353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Flush and close the file.
1363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1373ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        May be called more than once without error. Once the file is
1383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        closed, any other operation on it will raise a ValueError.
1393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
1403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if self._mode == _MODE_CLOSED:
1413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            return
1423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        try:
1432dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            if self._mode == _MODE_READ:
1442dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou                self._buffer.close()
1452dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou                self._buffer = None
1463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            elif self._mode == _MODE_WRITE:
1473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._fp.write(self._compressor.flush())
1483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._compressor = None
1493ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        finally:
1503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            try:
1513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                if self._closefp:
1523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                    self._fp.close()
1533ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            finally:
1543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._fp = None
1553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._closefp = False
1563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                self._mode = _MODE_CLOSED
1573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1583ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    @property
1593ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def closed(self):
1603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """True if this file is closed."""
1613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._mode == _MODE_CLOSED
1623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1633ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def fileno(self):
1643ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return the file descriptor for the underlying file."""
1653ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1663ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._fp.fileno()
1673ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1683ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def seekable(self):
1693ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file supports seeking."""
1702dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self.readable() and self._buffer.seekable()
1713ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1723ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def readable(self):
1733ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file was opened for reading."""
1743ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1752dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._mode == _MODE_READ
1763ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1773ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def writable(self):
1783ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return whether the file was opened for writing."""
1793ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
1803ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._mode == _MODE_WRITE
1813ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1823ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def peek(self, size=-1):
1833ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return buffered data without advancing the file position.
1843ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1853ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Always returns at least one byte of data, unless at EOF.
1863ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        The exact number of bytes returned is unspecified.
1873ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
1883ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
1892dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        # Relies on the undocumented fact that BufferedReader.peek() always
1902dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        # returns at least one byte (except at EOF)
1912dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._buffer.peek(size)
1923ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1933ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def read(self, size=-1):
1943ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Read up to size uncompressed bytes from the file.
1953ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
1963ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        If size is negative or omitted, read until EOF is reached.
1973ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns b"" if the file is already at EOF.
1983ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
1993ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
2002dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._buffer.read(size)
2013ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2023ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def read1(self, size=-1):
20337d3ff14871a25429fb93167aeace0589be45426Nadeem Vawda        """Read up to size uncompressed bytes, while trying to avoid
2042dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        making multiple reads from the underlying stream. Reads up to a
2052dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        buffer's worth of data if size is negative.
2063ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2073ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns b"" if the file is at EOF.
2083ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
2093ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_read()
2102dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        if size < 0:
2112dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            size = io.DEFAULT_BUFFER_SIZE
2122dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._buffer.read1(size)
2133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
214186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda    def readline(self, size=-1):
215186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        """Read a line of uncompressed bytes from the file.
216186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
217186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        The terminating newline (if present) is retained. If size is
218186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        non-negative, no more than size bytes will be read (in which
219186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        case the line may be incomplete). Returns b'' if already at EOF.
220186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        """
221186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda        self._check_can_read()
2222dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._buffer.readline(size)
223186370b43331536808a46d1f31269ae3ba7bd63bNadeem Vawda
2243ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def write(self, data):
2253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Write a bytes object to the file.
2263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns the number of uncompressed bytes written, which is
2283ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        always len(data). Note that due to buffering, the file on disk
2293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        may not reflect the data written until close() is called.
2303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
2313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_write()
2323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        compressed = self._compressor.compress(data)
2333ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._fp.write(compressed)
2343ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._pos += len(data)
2353ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return len(data)
2363ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2372dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou    def seek(self, offset, whence=io.SEEK_SET):
2383ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Change the file position.
2393ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2403ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        The new position is specified by offset, relative to the
2413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        position indicated by whence. Possible values for whence are:
2423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            0: start of stream (default): offset must not be negative
2443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            1: current stream position
2453ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            2: end of stream; offset must not be positive
2463ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2473ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        Returns the new file position.
2483ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2492dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        Note that seeking is emulated, so depending on the parameters,
2503ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        this operation may be extremely slow.
2513ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """
2523ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_can_seek()
2532dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        return self._buffer.seek(offset, whence)
2543ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2553ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    def tell(self):
2563ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        """Return the current file position."""
2573ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        self._check_not_closed()
2582dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou        if self._mode == _MODE_READ:
2592dbc6e6bce0a29757acddd8000d55f7c844295a2Antoine Pitrou            return self._buffer.tell()
2603ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        return self._pos
2613ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
2623ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
263e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawdadef open(filename, mode="rb", *,
264e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda         format=None, check=-1, preset=None, filters=None,
265e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda         encoding=None, errors=None, newline=None):
266e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    """Open an LZMA-compressed file in binary or text mode.
267e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
2685f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag    filename can be either an actual file name (given as a str, bytes,
2695f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag    or PathLike object), in which case the named file is opened, or it
2705f59ddddcd7020bdd3a87b50fe3a8aa0c8e3e689Berker Peksag    can be an existing file object to read from or write to.
271e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
27242ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda    The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",
27342ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda    "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text
27442ca98217ca544220fdf4d33875c811f342edc56Nadeem Vawda    mode.
275e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
276801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    The format, check, preset and filters arguments specify the
277801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    compression settings, as for LZMACompressor, LZMADecompressor and
278801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    LZMAFile.
279e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
280801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For binary mode, this function is equivalent to the LZMAFile
281801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    constructor: LZMAFile(filename, mode, ...). In this case, the
282801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    encoding, errors and newline arguments must not be provided.
283e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
2846a7b3a77b4b2be0badd24ee5f0fdbaa2e0e79c3dSerhiy Storchaka    For text mode, an LZMAFile object is created, and wrapped in an
285801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    io.TextIOWrapper instance with the specified encoding, error
286801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    handling behavior, and line ending(s).
287e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
288e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    """
289e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    if "t" in mode:
290e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if "b" in mode:
291e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Invalid mode: %r" % (mode,))
292e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    else:
293e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if encoding is not None:
294e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'encoding' not supported in binary mode")
295e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if errors is not None:
296e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'errors' not supported in binary mode")
297e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        if newline is not None:
298e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda            raise ValueError("Argument 'newline' not supported in binary mode")
299e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
300e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    lz_mode = mode.replace("t", "")
301e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
302e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda                           preset=preset, filters=filters)
303e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
304e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    if "t" in mode:
305e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        return io.TextIOWrapper(binary_file, encoding, errors, newline)
306e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda    else:
307e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda        return binary_file
308e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
309e860404eb78c2f6fcb05477bdb691e81009ee28dNadeem Vawda
3103ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
3113ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """Compress a block of data.
3123ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3133ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Refer to LZMACompressor's docstring for a description of the
3143ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    optional arguments *format*, *check*, *preset* and *filters*.
3153ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
316801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For incremental compression, use an LZMACompressor instead.
3173ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
3183ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    comp = LZMACompressor(format, check, preset, filters)
3193ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    return comp.compress(data) + comp.flush()
3203ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3213ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3223ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawdadef decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
3233ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """Decompress a block of data.
3243ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
3253ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    Refer to LZMADecompressor's docstring for a description of the
3263ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    optional arguments *format*, *check* and *filters*.
3273ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda
328801985e4b7bb0dd0fdef446e5b5d6d59484b7170Nadeem Vawda    For incremental decompression, use an LZMADecompressor instead.
3293ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    """
3303ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    results = []
3313ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda    while True:
3323ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        decomp = LZMADecompressor(format, memlimit, filters)
3339c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda        try:
3349c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda            res = decomp.decompress(data)
3359c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda        except LZMAError:
3369c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda            if results:
3379c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda                break  # Leftover data is not a valid LZMA/XZ stream; ignore it.
3389c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda            else:
3399c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda                raise  # Error on the first iteration; bail out.
3409c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda        results.append(res)
3413ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        if not decomp.eof:
3423ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda            raise LZMAError("Compressed data ended before the "
3433ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda                            "end-of-stream marker was reached")
3443ff069ebc6884c46c3f99ea61919f7728708c571Nadeem Vawda        data = decomp.unused_data
3459c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda        if not data:
3469c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda            break
3479c72ebc96be44c4ff66832cbe5e131065ae9d95dNadeem Vawda    return b"".join(results)
348