1"""
2Python implementation of the io module.
3"""
4
5from __future__ import (print_function, unicode_literals)
6
7import os
8import abc
9import codecs
10import warnings
11# Import thread instead of threading to reduce startup cost
12try:
13    from thread import allocate_lock as Lock
14except ImportError:
15    from dummy_thread import allocate_lock as Lock
16
17import io
18from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
19from errno import EINTR
20
21__metaclass__ = type
22
23# open() uses st_blksize whenever we can
24DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
25
26# NOTE: Base classes defined here are registered with the "official" ABCs
27# defined in io.py. We don't use real inheritance though, because we don't
28# want to inherit the C implementations.
29
30
31class BlockingIOError(IOError):
32
33    """Exception raised when I/O would block on a non-blocking I/O stream."""
34
35    def __init__(self, errno, strerror, characters_written=0):
36        super(IOError, self).__init__(errno, strerror)
37        if not isinstance(characters_written, (int, long)):
38            raise TypeError("characters_written must be a integer")
39        self.characters_written = characters_written
40
41
42def open(file, mode="r", buffering=-1,
43         encoding=None, errors=None,
44         newline=None, closefd=True):
45
46    r"""Open file and return a stream.  Raise IOError upon failure.
47
48    file is either a text or byte string giving the name (and the path
49    if the file isn't in the current working directory) of the file to
50    be opened or an integer file descriptor of the file to be
51    wrapped. (If a file descriptor is given, it is closed when the
52    returned I/O object is closed, unless closefd is set to False.)
53
54    mode is an optional string that specifies the mode in which the file
55    is opened. It defaults to 'r' which means open for reading in text
56    mode.  Other common values are 'w' for writing (truncating the file if
57    it already exists), and 'a' for appending (which on some Unix systems,
58    means that all writes append to the end of the file regardless of the
59    current seek position). In text mode, if encoding is not specified the
60    encoding used is platform dependent. (For reading and writing raw
61    bytes use binary mode and leave encoding unspecified.) The available
62    modes are:
63
64    ========= ===============================================================
65    Character Meaning
66    --------- ---------------------------------------------------------------
67    'r'       open for reading (default)
68    'w'       open for writing, truncating the file first
69    'a'       open for writing, appending to the end of the file if it exists
70    'b'       binary mode
71    't'       text mode (default)
72    '+'       open a disk file for updating (reading and writing)
73    'U'       universal newline mode (for backwards compatibility; unneeded
74              for new code)
75    ========= ===============================================================
76
77    The default mode is 'rt' (open for reading text). For binary random
78    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
79    'r+b' opens the file without truncation.
80
81    Python distinguishes between files opened in binary and text modes,
82    even when the underlying operating system doesn't. Files opened in
83    binary mode (appending 'b' to the mode argument) return contents as
84    bytes objects without any decoding. In text mode (the default, or when
85    't' is appended to the mode argument), the contents of the file are
86    returned as strings, the bytes having been first decoded using a
87    platform-dependent encoding or using the specified encoding if given.
88
89    buffering is an optional integer used to set the buffering policy.
90    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
91    line buffering (only usable in text mode), and an integer > 1 to indicate
92    the size of a fixed-size chunk buffer.  When no buffering argument is
93    given, the default buffering policy works as follows:
94
95    * Binary files are buffered in fixed-size chunks; the size of the buffer
96      is chosen using a heuristic trying to determine the underlying device's
97      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
98      On many systems, the buffer will typically be 4096 or 8192 bytes long.
99
100    * "Interactive" text files (files for which isatty() returns True)
101      use line buffering.  Other text files use the policy described above
102      for binary files.
103
104    encoding is the name of the encoding used to decode or encode the
105    file. This should only be used in text mode. The default encoding is
106    platform dependent, but any encoding supported by Python can be
107    passed.  See the codecs module for the list of supported encodings.
108
109    errors is an optional string that specifies how encoding errors are to
110    be handled---this argument should not be used in binary mode. Pass
111    'strict' to raise a ValueError exception if there is an encoding error
112    (the default of None has the same effect), or pass 'ignore' to ignore
113    errors. (Note that ignoring encoding errors can lead to data loss.)
114    See the documentation for codecs.register for a list of the permitted
115    encoding error strings.
116
117    newline controls how universal newlines works (it only applies to text
118    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
119    follows:
120
121    * On input, if newline is None, universal newlines mode is
122      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
123      these are translated into '\n' before being returned to the
124      caller. If it is '', universal newline mode is enabled, but line
125      endings are returned to the caller untranslated. If it has any of
126      the other legal values, input lines are only terminated by the given
127      string, and the line ending is returned to the caller untranslated.
128
129    * On output, if newline is None, any '\n' characters written are
130      translated to the system default line separator, os.linesep. If
131      newline is '', no translation takes place. If newline is any of the
132      other legal values, any '\n' characters written are translated to
133      the given string.
134
135    If closefd is False, the underlying file descriptor will be kept open
136    when the file is closed. This does not work when a file name is given
137    and must be True in that case.
138
139    open() returns a file object whose type depends on the mode, and
140    through which the standard file operations such as reading and writing
141    are performed. When open() is used to open a file in a text mode ('w',
142    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
143    a file in a binary mode, the returned class varies: in read binary
144    mode, it returns a BufferedReader; in write binary and append binary
145    modes, it returns a BufferedWriter, and in read/write mode, it returns
146    a BufferedRandom.
147
148    It is also possible to use a string or bytearray as a file for both
149    reading and writing. For strings StringIO can be used like a file
150    opened in a text mode, and for bytes a BytesIO can be used like a file
151    opened in a binary mode.
152    """
153    if not isinstance(file, (basestring, int, long)):
154        raise TypeError("invalid file: %r" % file)
155    if not isinstance(mode, basestring):
156        raise TypeError("invalid mode: %r" % mode)
157    if not isinstance(buffering, (int, long)):
158        raise TypeError("invalid buffering: %r" % buffering)
159    if encoding is not None and not isinstance(encoding, basestring):
160        raise TypeError("invalid encoding: %r" % encoding)
161    if errors is not None and not isinstance(errors, basestring):
162        raise TypeError("invalid errors: %r" % errors)
163    modes = set(mode)
164    if modes - set("arwb+tU") or len(mode) > len(modes):
165        raise ValueError("invalid mode: %r" % mode)
166    reading = "r" in modes
167    writing = "w" in modes
168    appending = "a" in modes
169    updating = "+" in modes
170    text = "t" in modes
171    binary = "b" in modes
172    if "U" in modes:
173        if writing or appending:
174            raise ValueError("can't use U and writing mode at once")
175        reading = True
176    if text and binary:
177        raise ValueError("can't have text and binary mode at once")
178    if reading + writing + appending > 1:
179        raise ValueError("can't have read/write/append mode at once")
180    if not (reading or writing or appending):
181        raise ValueError("must have exactly one of read/write/append mode")
182    if binary and encoding is not None:
183        raise ValueError("binary mode doesn't take an encoding argument")
184    if binary and errors is not None:
185        raise ValueError("binary mode doesn't take an errors argument")
186    if binary and newline is not None:
187        raise ValueError("binary mode doesn't take a newline argument")
188    raw = FileIO(file,
189                 (reading and "r" or "") +
190                 (writing and "w" or "") +
191                 (appending and "a" or "") +
192                 (updating and "+" or ""),
193                 closefd)
194    line_buffering = False
195    if buffering == 1 or buffering < 0 and raw.isatty():
196        buffering = -1
197        line_buffering = True
198    if buffering < 0:
199        buffering = DEFAULT_BUFFER_SIZE
200        try:
201            bs = os.fstat(raw.fileno()).st_blksize
202        except (os.error, AttributeError):
203            pass
204        else:
205            if bs > 1:
206                buffering = bs
207    if buffering < 0:
208        raise ValueError("invalid buffering size")
209    if buffering == 0:
210        if binary:
211            return raw
212        raise ValueError("can't have unbuffered text I/O")
213    if updating:
214        buffer = BufferedRandom(raw, buffering)
215    elif writing or appending:
216        buffer = BufferedWriter(raw, buffering)
217    elif reading:
218        buffer = BufferedReader(raw, buffering)
219    else:
220        raise ValueError("unknown mode: %r" % mode)
221    if binary:
222        return buffer
223    text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
224    text.mode = mode
225    return text
226
227
228class DocDescriptor:
229    """Helper for builtins.open.__doc__
230    """
231    def __get__(self, obj, typ):
232        return (
233            "open(file, mode='r', buffering=-1, encoding=None, "
234                 "errors=None, newline=None, closefd=True)\n\n" +
235            open.__doc__)
236
237class OpenWrapper:
238    """Wrapper for builtins.open
239
240    Trick so that open won't become a bound method when stored
241    as a class variable (as dbm.dumb does).
242
243    See initstdio() in Python/pythonrun.c.
244    """
245    __doc__ = DocDescriptor()
246
247    def __new__(cls, *args, **kwargs):
248        return open(*args, **kwargs)
249
250
251class UnsupportedOperation(ValueError, IOError):
252    pass
253
254
255class IOBase:
256    __metaclass__ = abc.ABCMeta
257
258    """The abstract base class for all I/O classes, acting on streams of
259    bytes. There is no public constructor.
260
261    This class provides dummy implementations for many methods that
262    derived classes can override selectively; the default implementations
263    represent a file that cannot be read, written or seeked.
264
265    Even though IOBase does not declare read, readinto, or write because
266    their signatures will vary, implementations and clients should
267    consider those methods part of the interface. Also, implementations
268    may raise a IOError when operations they do not support are called.
269
270    The basic type used for binary data read from or written to a file is
271    bytes. bytearrays are accepted too, and in some cases (such as
272    readinto) needed. Text I/O classes work with str data.
273
274    Note that calling any method (even inquiries) on a closed stream is
275    undefined. Implementations may raise IOError in this case.
276
277    IOBase (and its subclasses) support the iterator protocol, meaning
278    that an IOBase object can be iterated over yielding the lines in a
279    stream.
280
281    IOBase also supports the :keyword:`with` statement. In this example,
282    fp is closed after the suite of the with statement is complete:
283
284    with open('spam.txt', 'r') as fp:
285        fp.write('Spam and eggs!')
286    """
287
288    ### Internal ###
289
290    def _unsupported(self, name):
291        """Internal: raise an exception for unsupported operations."""
292        raise UnsupportedOperation("%s.%s() not supported" %
293                                   (self.__class__.__name__, name))
294
295    ### Positioning ###
296
297    def seek(self, pos, whence=0):
298        """Change stream position.
299
300        Change the stream position to byte offset offset. offset is
301        interpreted relative to the position indicated by whence.  Values
302        for whence are:
303
304        * 0 -- start of stream (the default); offset should be zero or positive
305        * 1 -- current stream position; offset may be negative
306        * 2 -- end of stream; offset is usually negative
307
308        Return the new absolute position.
309        """
310        self._unsupported("seek")
311
312    def tell(self):
313        """Return current stream position."""
314        return self.seek(0, 1)
315
316    def truncate(self, pos=None):
317        """Truncate file to size bytes.
318
319        Size defaults to the current IO position as reported by tell().  Return
320        the new size.
321        """
322        self._unsupported("truncate")
323
324    ### Flush and close ###
325
326    def flush(self):
327        """Flush write buffers, if applicable.
328
329        This is not implemented for read-only and non-blocking streams.
330        """
331        self._checkClosed()
332        # XXX Should this return the number of bytes written???
333
334    __closed = False
335
336    def close(self):
337        """Flush and close the IO object.
338
339        This method has no effect if the file is already closed.
340        """
341        if not self.__closed:
342            self.flush()
343            self.__closed = True
344
345    def __del__(self):
346        """Destructor.  Calls close()."""
347        # The try/except block is in case this is called at program
348        # exit time, when it's possible that globals have already been
349        # deleted, and then the close() call might fail.  Since
350        # there's nothing we can do about such failures and they annoy
351        # the end users, we suppress the traceback.
352        try:
353            self.close()
354        except:
355            pass
356
357    ### Inquiries ###
358
359    def seekable(self):
360        """Return whether object supports random access.
361
362        If False, seek(), tell() and truncate() will raise IOError.
363        This method may need to do a test seek().
364        """
365        return False
366
367    def _checkSeekable(self, msg=None):
368        """Internal: raise an IOError if file is not seekable
369        """
370        if not self.seekable():
371            raise IOError("File or stream is not seekable."
372                          if msg is None else msg)
373
374
375    def readable(self):
376        """Return whether object was opened for reading.
377
378        If False, read() will raise IOError.
379        """
380        return False
381
382    def _checkReadable(self, msg=None):
383        """Internal: raise an IOError if file is not readable
384        """
385        if not self.readable():
386            raise IOError("File or stream is not readable."
387                          if msg is None else msg)
388
389    def writable(self):
390        """Return whether object was opened for writing.
391
392        If False, write() and truncate() will raise IOError.
393        """
394        return False
395
396    def _checkWritable(self, msg=None):
397        """Internal: raise an IOError if file is not writable
398        """
399        if not self.writable():
400            raise IOError("File or stream is not writable."
401                          if msg is None else msg)
402
403    @property
404    def closed(self):
405        """closed: bool.  True iff the file has been closed.
406
407        For backwards compatibility, this is a property, not a predicate.
408        """
409        return self.__closed
410
411    def _checkClosed(self, msg=None):
412        """Internal: raise an ValueError if file is closed
413        """
414        if self.closed:
415            raise ValueError("I/O operation on closed file."
416                             if msg is None else msg)
417
418    ### Context manager ###
419
420    def __enter__(self):
421        """Context management protocol.  Returns self."""
422        self._checkClosed()
423        return self
424
425    def __exit__(self, *args):
426        """Context management protocol.  Calls close()"""
427        self.close()
428
429    ### Lower-level APIs ###
430
431    # XXX Should these be present even if unimplemented?
432
433    def fileno(self):
434        """Returns underlying file descriptor if one exists.
435
436        An IOError is raised if the IO object does not use a file descriptor.
437        """
438        self._unsupported("fileno")
439
440    def isatty(self):
441        """Return whether this is an 'interactive' stream.
442
443        Return False if it can't be determined.
444        """
445        self._checkClosed()
446        return False
447
448    ### Readline[s] and writelines ###
449
450    def readline(self, limit=-1):
451        r"""Read and return a line from the stream.
452
453        If limit is specified, at most limit bytes will be read.
454
455        The line terminator is always b'\n' for binary files; for text
456        files, the newlines argument to open can be used to select the line
457        terminator(s) recognized.
458        """
459        # For backwards compatibility, a (slowish) readline().
460        if hasattr(self, "peek"):
461            def nreadahead():
462                readahead = self.peek(1)
463                if not readahead:
464                    return 1
465                n = (readahead.find(b"\n") + 1) or len(readahead)
466                if limit >= 0:
467                    n = min(n, limit)
468                return n
469        else:
470            def nreadahead():
471                return 1
472        if limit is None:
473            limit = -1
474        elif not isinstance(limit, (int, long)):
475            raise TypeError("limit must be an integer")
476        res = bytearray()
477        while limit < 0 or len(res) < limit:
478            b = self.read(nreadahead())
479            if not b:
480                break
481            res += b
482            if res.endswith(b"\n"):
483                break
484        return bytes(res)
485
486    def __iter__(self):
487        self._checkClosed()
488        return self
489
490    def next(self):
491        line = self.readline()
492        if not line:
493            raise StopIteration
494        return line
495
496    def readlines(self, hint=None):
497        """Return a list of lines from the stream.
498
499        hint can be specified to control the number of lines read: no more
500        lines will be read if the total size (in bytes/characters) of all
501        lines so far exceeds hint.
502        """
503        if hint is not None and not isinstance(hint, (int, long)):
504            raise TypeError("integer or None expected")
505        if hint is None or hint <= 0:
506            return list(self)
507        n = 0
508        lines = []
509        for line in self:
510            lines.append(line)
511            n += len(line)
512            if n >= hint:
513                break
514        return lines
515
516    def writelines(self, lines):
517        self._checkClosed()
518        for line in lines:
519            self.write(line)
520
521io.IOBase.register(IOBase)
522
523
524class RawIOBase(IOBase):
525
526    """Base class for raw binary I/O."""
527
528    # The read() method is implemented by calling readinto(); derived
529    # classes that want to support read() only need to implement
530    # readinto() as a primitive operation.  In general, readinto() can be
531    # more efficient than read().
532
533    # (It would be tempting to also provide an implementation of
534    # readinto() in terms of read(), in case the latter is a more suitable
535    # primitive operation, but that would lead to nasty recursion in case
536    # a subclass doesn't implement either.)
537
538    def read(self, n=-1):
539        """Read and return up to n bytes.
540
541        Returns an empty bytes object on EOF, or None if the object is
542        set not to block and has no data to read.
543        """
544        if n is None:
545            n = -1
546        if n < 0:
547            return self.readall()
548        b = bytearray(n.__index__())
549        n = self.readinto(b)
550        if n is None:
551            return None
552        del b[n:]
553        return bytes(b)
554
555    def readall(self):
556        """Read until EOF, using multiple read() call."""
557        res = bytearray()
558        while True:
559            data = self.read(DEFAULT_BUFFER_SIZE)
560            if not data:
561                break
562            res += data
563        if res:
564            return bytes(res)
565        else:
566            # b'' or None
567            return data
568
569    def readinto(self, b):
570        """Read up to len(b) bytes into b.
571
572        Returns number of bytes read (0 for EOF), or None if the object
573        is set not to block and has no data to read.
574        """
575        self._unsupported("readinto")
576
577    def write(self, b):
578        """Write the given buffer to the IO stream.
579
580        Returns the number of bytes written, which may be less than len(b).
581        """
582        self._unsupported("write")
583
584io.RawIOBase.register(RawIOBase)
585from _io import FileIO
586RawIOBase.register(FileIO)
587
588
589class BufferedIOBase(IOBase):
590
591    """Base class for buffered IO objects.
592
593    The main difference with RawIOBase is that the read() method
594    supports omitting the size argument, and does not have a default
595    implementation that defers to readinto().
596
597    In addition, read(), readinto() and write() may raise
598    BlockingIOError if the underlying raw stream is in non-blocking
599    mode and not ready; unlike their raw counterparts, they will never
600    return None.
601
602    A typical implementation should not inherit from a RawIOBase
603    implementation, but wrap one.
604    """
605
606    def read(self, n=None):
607        """Read and return up to n bytes.
608
609        If the argument is omitted, None, or negative, reads and
610        returns all data until EOF.
611
612        If the argument is positive, and the underlying raw stream is
613        not 'interactive', multiple raw reads may be issued to satisfy
614        the byte count (unless EOF is reached first).  But for
615        interactive raw streams (XXX and for pipes?), at most one raw
616        read will be issued, and a short result does not imply that
617        EOF is imminent.
618
619        Returns an empty bytes array on EOF.
620
621        Raises BlockingIOError if the underlying raw stream has no
622        data at the moment.
623        """
624        self._unsupported("read")
625
626    def read1(self, n=None):
627        """Read up to n bytes with at most one read() system call."""
628        self._unsupported("read1")
629
630    def readinto(self, b):
631        """Read up to len(b) bytes into b.
632
633        Like read(), this may issue multiple reads to the underlying raw
634        stream, unless the latter is 'interactive'.
635
636        Returns the number of bytes read (0 for EOF).
637
638        Raises BlockingIOError if the underlying raw stream has no
639        data at the moment.
640        """
641        # XXX This ought to work with anything that supports the buffer API
642        data = self.read(len(b))
643        n = len(data)
644        try:
645            b[:n] = data
646        except TypeError as err:
647            import array
648            if not isinstance(b, array.array):
649                raise err
650            b[:n] = array.array(b'b', data)
651        return n
652
653    def write(self, b):
654        """Write the given buffer to the IO stream.
655
656        Return the number of bytes written, which is never less than
657        len(b).
658
659        Raises BlockingIOError if the buffer is full and the
660        underlying raw stream cannot accept more data at the moment.
661        """
662        self._unsupported("write")
663
664    def detach(self):
665        """
666        Separate the underlying raw stream from the buffer and return it.
667
668        After the raw stream has been detached, the buffer is in an unusable
669        state.
670        """
671        self._unsupported("detach")
672
673io.BufferedIOBase.register(BufferedIOBase)
674
675
676class _BufferedIOMixin(BufferedIOBase):
677
678    """A mixin implementation of BufferedIOBase with an underlying raw stream.
679
680    This passes most requests on to the underlying raw stream.  It
681    does *not* provide implementations of read(), readinto() or
682    write().
683    """
684
685    def __init__(self, raw):
686        self._raw = raw
687
688    ### Positioning ###
689
690    def seek(self, pos, whence=0):
691        new_position = self.raw.seek(pos, whence)
692        if new_position < 0:
693            raise IOError("seek() returned an invalid position")
694        return new_position
695
696    def tell(self):
697        pos = self.raw.tell()
698        if pos < 0:
699            raise IOError("tell() returned an invalid position")
700        return pos
701
702    def truncate(self, pos=None):
703        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
704        # and a flush may be necessary to synch both views of the current
705        # file state.
706        self.flush()
707
708        if pos is None:
709            pos = self.tell()
710        # XXX: Should seek() be used, instead of passing the position
711        # XXX  directly to truncate?
712        return self.raw.truncate(pos)
713
714    ### Flush and close ###
715
716    def flush(self):
717        if self.closed:
718            raise ValueError("flush of closed file")
719        self.raw.flush()
720
721    def close(self):
722        if self.raw is not None and not self.closed:
723            self.flush()
724            self.raw.close()
725
726    def detach(self):
727        if self.raw is None:
728            raise ValueError("raw stream already detached")
729        self.flush()
730        raw = self._raw
731        self._raw = None
732        return raw
733
734    ### Inquiries ###
735
736    def seekable(self):
737        return self.raw.seekable()
738
739    def readable(self):
740        return self.raw.readable()
741
742    def writable(self):
743        return self.raw.writable()
744
745    @property
746    def raw(self):
747        return self._raw
748
749    @property
750    def closed(self):
751        return self.raw.closed
752
753    @property
754    def name(self):
755        return self.raw.name
756
757    @property
758    def mode(self):
759        return self.raw.mode
760
761    def __repr__(self):
762        clsname = self.__class__.__name__
763        try:
764            name = self.name
765        except AttributeError:
766            return "<_pyio.{0}>".format(clsname)
767        else:
768            return "<_pyio.{0} name={1!r}>".format(clsname, name)
769
770    ### Lower-level APIs ###
771
772    def fileno(self):
773        return self.raw.fileno()
774
775    def isatty(self):
776        return self.raw.isatty()
777
778
779class BytesIO(BufferedIOBase):
780
781    """Buffered I/O implementation using an in-memory bytes buffer."""
782
783    def __init__(self, initial_bytes=None):
784        buf = bytearray()
785        if initial_bytes is not None:
786            buf.extend(initial_bytes)
787        self._buffer = buf
788        self._pos = 0
789
790    def __getstate__(self):
791        if self.closed:
792            raise ValueError("__getstate__ on closed file")
793        return self.__dict__.copy()
794
795    def getvalue(self):
796        """Return the bytes value (contents) of the buffer
797        """
798        if self.closed:
799            raise ValueError("getvalue on closed file")
800        return bytes(self._buffer)
801
802    def read(self, n=None):
803        if self.closed:
804            raise ValueError("read from closed file")
805        if n is None:
806            n = -1
807        if not isinstance(n, (int, long)):
808            raise TypeError("integer argument expected, got {0!r}".format(
809                type(n)))
810        if n < 0:
811            n = len(self._buffer)
812        if len(self._buffer) <= self._pos:
813            return b""
814        newpos = min(len(self._buffer), self._pos + n)
815        b = self._buffer[self._pos : newpos]
816        self._pos = newpos
817        return bytes(b)
818
819    def read1(self, n):
820        """This is the same as read.
821        """
822        return self.read(n)
823
824    def write(self, b):
825        if self.closed:
826            raise ValueError("write to closed file")
827        if isinstance(b, unicode):
828            raise TypeError("can't write unicode to binary stream")
829        n = len(b)
830        if n == 0:
831            return 0
832        pos = self._pos
833        if pos > len(self._buffer):
834            # Inserts null bytes between the current end of the file
835            # and the new write position.
836            padding = b'\x00' * (pos - len(self._buffer))
837            self._buffer += padding
838        self._buffer[pos:pos + n] = b
839        self._pos += n
840        return n
841
842    def seek(self, pos, whence=0):
843        if self.closed:
844            raise ValueError("seek on closed file")
845        try:
846            pos.__index__
847        except AttributeError:
848            raise TypeError("an integer is required")
849        if whence == 0:
850            if pos < 0:
851                raise ValueError("negative seek position %r" % (pos,))
852            self._pos = pos
853        elif whence == 1:
854            self._pos = max(0, self._pos + pos)
855        elif whence == 2:
856            self._pos = max(0, len(self._buffer) + pos)
857        else:
858            raise ValueError("invalid whence value")
859        return self._pos
860
861    def tell(self):
862        if self.closed:
863            raise ValueError("tell on closed file")
864        return self._pos
865
866    def truncate(self, pos=None):
867        if self.closed:
868            raise ValueError("truncate on closed file")
869        if pos is None:
870            pos = self._pos
871        else:
872            try:
873                pos.__index__
874            except AttributeError:
875                raise TypeError("an integer is required")
876            if pos < 0:
877                raise ValueError("negative truncate position %r" % (pos,))
878        del self._buffer[pos:]
879        return pos
880
881    def readable(self):
882        return True
883
884    def writable(self):
885        return True
886
887    def seekable(self):
888        return True
889
890
891class BufferedReader(_BufferedIOMixin):
892
893    """BufferedReader(raw[, buffer_size])
894
895    A buffer for a readable, sequential BaseRawIO object.
896
897    The constructor creates a BufferedReader for the given readable raw
898    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
899    is used.
900    """
901
902    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
903        """Create a new buffered reader using the given readable raw IO object.
904        """
905        if not raw.readable():
906            raise IOError('"raw" argument must be readable.')
907
908        _BufferedIOMixin.__init__(self, raw)
909        if buffer_size <= 0:
910            raise ValueError("invalid buffer size")
911        self.buffer_size = buffer_size
912        self._reset_read_buf()
913        self._read_lock = Lock()
914
915    def _reset_read_buf(self):
916        self._read_buf = b""
917        self._read_pos = 0
918
919    def read(self, n=None):
920        """Read n bytes.
921
922        Returns exactly n bytes of data unless the underlying raw IO
923        stream reaches EOF or if the call would block in non-blocking
924        mode. If n is negative, read until EOF or until read() would
925        block.
926        """
927        if n is not None and n < -1:
928            raise ValueError("invalid number of bytes to read")
929        with self._read_lock:
930            return self._read_unlocked(n)
931
932    def _read_unlocked(self, n=None):
933        nodata_val = b""
934        empty_values = (b"", None)
935        buf = self._read_buf
936        pos = self._read_pos
937
938        # Special case for when the number of bytes to read is unspecified.
939        if n is None or n == -1:
940            self._reset_read_buf()
941            chunks = [buf[pos:]]  # Strip the consumed bytes.
942            current_size = 0
943            while True:
944                # Read until EOF or until read() would block.
945                try:
946                    chunk = self.raw.read()
947                except IOError as e:
948                    if e.errno != EINTR:
949                        raise
950                    continue
951                if chunk in empty_values:
952                    nodata_val = chunk
953                    break
954                current_size += len(chunk)
955                chunks.append(chunk)
956            return b"".join(chunks) or nodata_val
957
958        # The number of bytes to read is specified, return at most n bytes.
959        avail = len(buf) - pos  # Length of the available buffered data.
960        if n <= avail:
961            # Fast path: the data to read is fully buffered.
962            self._read_pos += n
963            return buf[pos:pos+n]
964        # Slow path: read from the stream until enough bytes are read,
965        # or until an EOF occurs or until read() would block.
966        chunks = [buf[pos:]]
967        wanted = max(self.buffer_size, n)
968        while avail < n:
969            try:
970                chunk = self.raw.read(wanted)
971            except IOError as e:
972                if e.errno != EINTR:
973                    raise
974                continue
975            if chunk in empty_values:
976                nodata_val = chunk
977                break
978            avail += len(chunk)
979            chunks.append(chunk)
980        # n is more then avail only when an EOF occurred or when
981        # read() would have blocked.
982        n = min(n, avail)
983        out = b"".join(chunks)
984        self._read_buf = out[n:]  # Save the extra data in the buffer.
985        self._read_pos = 0
986        return out[:n] if out else nodata_val
987
988    def peek(self, n=0):
989        """Returns buffered bytes without advancing the position.
990
991        The argument indicates a desired minimal number of bytes; we
992        do at most one raw read to satisfy it.  We never return more
993        than self.buffer_size.
994        """
995        with self._read_lock:
996            return self._peek_unlocked(n)
997
998    def _peek_unlocked(self, n=0):
999        want = min(n, self.buffer_size)
1000        have = len(self._read_buf) - self._read_pos
1001        if have < want or have <= 0:
1002            to_read = self.buffer_size - have
1003            while True:
1004                try:
1005                    current = self.raw.read(to_read)
1006                except IOError as e:
1007                    if e.errno != EINTR:
1008                        raise
1009                    continue
1010                break
1011            if current:
1012                self._read_buf = self._read_buf[self._read_pos:] + current
1013                self._read_pos = 0
1014        return self._read_buf[self._read_pos:]
1015
1016    def read1(self, n):
1017        """Reads up to n bytes, with at most one read() system call."""
1018        # Returns up to n bytes.  If at least one byte is buffered, we
1019        # only return buffered bytes.  Otherwise, we do one raw read.
1020        if n < 0:
1021            raise ValueError("number of bytes to read must be positive")
1022        if n == 0:
1023            return b""
1024        with self._read_lock:
1025            self._peek_unlocked(1)
1026            return self._read_unlocked(
1027                min(n, len(self._read_buf) - self._read_pos))
1028
1029    def tell(self):
1030        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1031
1032    def seek(self, pos, whence=0):
1033        if not (0 <= whence <= 2):
1034            raise ValueError("invalid whence value")
1035        with self._read_lock:
1036            if whence == 1:
1037                pos -= len(self._read_buf) - self._read_pos
1038            pos = _BufferedIOMixin.seek(self, pos, whence)
1039            self._reset_read_buf()
1040            return pos
1041
1042class BufferedWriter(_BufferedIOMixin):
1043
1044    """A buffer for a writeable sequential RawIO object.
1045
1046    The constructor creates a BufferedWriter for the given writeable raw
1047    stream. If the buffer_size is not given, it defaults to
1048    DEFAULT_BUFFER_SIZE.
1049    """
1050
1051    _warning_stack_offset = 2
1052
1053    def __init__(self, raw,
1054                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1055        if not raw.writable():
1056            raise IOError('"raw" argument must be writable.')
1057
1058        _BufferedIOMixin.__init__(self, raw)
1059        if buffer_size <= 0:
1060            raise ValueError("invalid buffer size")
1061        if max_buffer_size is not None:
1062            warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1063                          self._warning_stack_offset)
1064        self.buffer_size = buffer_size
1065        self._write_buf = bytearray()
1066        self._write_lock = Lock()
1067
1068    def write(self, b):
1069        if self.closed:
1070            raise ValueError("write to closed file")
1071        if isinstance(b, unicode):
1072            raise TypeError("can't write unicode to binary stream")
1073        with self._write_lock:
1074            # XXX we can implement some more tricks to try and avoid
1075            # partial writes
1076            if len(self._write_buf) > self.buffer_size:
1077                # We're full, so let's pre-flush the buffer
1078                try:
1079                    self._flush_unlocked()
1080                except BlockingIOError as e:
1081                    # We can't accept anything else.
1082                    # XXX Why not just let the exception pass through?
1083                    raise BlockingIOError(e.errno, e.strerror, 0)
1084            before = len(self._write_buf)
1085            self._write_buf.extend(b)
1086            written = len(self._write_buf) - before
1087            if len(self._write_buf) > self.buffer_size:
1088                try:
1089                    self._flush_unlocked()
1090                except BlockingIOError as e:
1091                    if len(self._write_buf) > self.buffer_size:
1092                        # We've hit the buffer_size. We have to accept a partial
1093                        # write and cut back our buffer.
1094                        overage = len(self._write_buf) - self.buffer_size
1095                        written -= overage
1096                        self._write_buf = self._write_buf[:self.buffer_size]
1097                        raise BlockingIOError(e.errno, e.strerror, written)
1098            return written
1099
1100    def truncate(self, pos=None):
1101        with self._write_lock:
1102            self._flush_unlocked()
1103            if pos is None:
1104                pos = self.raw.tell()
1105            return self.raw.truncate(pos)
1106
1107    def flush(self):
1108        with self._write_lock:
1109            self._flush_unlocked()
1110
1111    def _flush_unlocked(self):
1112        if self.closed:
1113            raise ValueError("flush of closed file")
1114        written = 0
1115        try:
1116            while self._write_buf:
1117                try:
1118                    n = self.raw.write(self._write_buf)
1119                except IOError as e:
1120                    if e.errno != EINTR:
1121                        raise
1122                    continue
1123                if n > len(self._write_buf) or n < 0:
1124                    raise IOError("write() returned incorrect number of bytes")
1125                del self._write_buf[:n]
1126                written += n
1127        except BlockingIOError as e:
1128            n = e.characters_written
1129            del self._write_buf[:n]
1130            written += n
1131            raise BlockingIOError(e.errno, e.strerror, written)
1132
1133    def tell(self):
1134        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1135
1136    def seek(self, pos, whence=0):
1137        if not (0 <= whence <= 2):
1138            raise ValueError("invalid whence")
1139        with self._write_lock:
1140            self._flush_unlocked()
1141            return _BufferedIOMixin.seek(self, pos, whence)
1142
1143
1144class BufferedRWPair(BufferedIOBase):
1145
1146    """A buffered reader and writer object together.
1147
1148    A buffered reader object and buffered writer object put together to
1149    form a sequential IO object that can read and write. This is typically
1150    used with a socket or two-way pipe.
1151
1152    reader and writer are RawIOBase objects that are readable and
1153    writeable respectively. If the buffer_size is omitted it defaults to
1154    DEFAULT_BUFFER_SIZE.
1155    """
1156
1157    # XXX The usefulness of this (compared to having two separate IO
1158    # objects) is questionable.
1159
1160    def __init__(self, reader, writer,
1161                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1162        """Constructor.
1163
1164        The arguments are two RawIO instances.
1165        """
1166        if max_buffer_size is not None:
1167            warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1168
1169        if not reader.readable():
1170            raise IOError('"reader" argument must be readable.')
1171
1172        if not writer.writable():
1173            raise IOError('"writer" argument must be writable.')
1174
1175        self.reader = BufferedReader(reader, buffer_size)
1176        self.writer = BufferedWriter(writer, buffer_size)
1177
1178    def read(self, n=None):
1179        if n is None:
1180            n = -1
1181        return self.reader.read(n)
1182
1183    def readinto(self, b):
1184        return self.reader.readinto(b)
1185
1186    def write(self, b):
1187        return self.writer.write(b)
1188
1189    def peek(self, n=0):
1190        return self.reader.peek(n)
1191
1192    def read1(self, n):
1193        return self.reader.read1(n)
1194
1195    def readable(self):
1196        return self.reader.readable()
1197
1198    def writable(self):
1199        return self.writer.writable()
1200
1201    def flush(self):
1202        return self.writer.flush()
1203
1204    def close(self):
1205        self.writer.close()
1206        self.reader.close()
1207
1208    def isatty(self):
1209        return self.reader.isatty() or self.writer.isatty()
1210
1211    @property
1212    def closed(self):
1213        return self.writer.closed
1214
1215
1216class BufferedRandom(BufferedWriter, BufferedReader):
1217
1218    """A buffered interface to random access streams.
1219
1220    The constructor creates a reader and writer for a seekable stream,
1221    raw, given in the first argument. If the buffer_size is omitted it
1222    defaults to DEFAULT_BUFFER_SIZE.
1223    """
1224
1225    _warning_stack_offset = 3
1226
1227    def __init__(self, raw,
1228                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1229        raw._checkSeekable()
1230        BufferedReader.__init__(self, raw, buffer_size)
1231        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1232
1233    def seek(self, pos, whence=0):
1234        if not (0 <= whence <= 2):
1235            raise ValueError("invalid whence")
1236        self.flush()
1237        if self._read_buf:
1238            # Undo read ahead.
1239            with self._read_lock:
1240                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1241        # First do the raw seek, then empty the read buffer, so that
1242        # if the raw seek fails, we don't lose buffered data forever.
1243        pos = self.raw.seek(pos, whence)
1244        with self._read_lock:
1245            self._reset_read_buf()
1246        if pos < 0:
1247            raise IOError("seek() returned invalid position")
1248        return pos
1249
1250    def tell(self):
1251        if self._write_buf:
1252            return BufferedWriter.tell(self)
1253        else:
1254            return BufferedReader.tell(self)
1255
1256    def truncate(self, pos=None):
1257        if pos is None:
1258            pos = self.tell()
1259        # Use seek to flush the read buffer.
1260        return BufferedWriter.truncate(self, pos)
1261
1262    def read(self, n=None):
1263        if n is None:
1264            n = -1
1265        self.flush()
1266        return BufferedReader.read(self, n)
1267
1268    def readinto(self, b):
1269        self.flush()
1270        return BufferedReader.readinto(self, b)
1271
1272    def peek(self, n=0):
1273        self.flush()
1274        return BufferedReader.peek(self, n)
1275
1276    def read1(self, n):
1277        self.flush()
1278        return BufferedReader.read1(self, n)
1279
1280    def write(self, b):
1281        if self._read_buf:
1282            # Undo readahead
1283            with self._read_lock:
1284                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1285                self._reset_read_buf()
1286        return BufferedWriter.write(self, b)
1287
1288
1289class TextIOBase(IOBase):
1290
1291    """Base class for text I/O.
1292
1293    This class provides a character and line based interface to stream
1294    I/O. There is no readinto method because Python's character strings
1295    are immutable. There is no public constructor.
1296    """
1297
1298    def read(self, n=-1):
1299        """Read at most n characters from stream.
1300
1301        Read from underlying buffer until we have n characters or we hit EOF.
1302        If n is negative or omitted, read until EOF.
1303        """
1304        self._unsupported("read")
1305
1306    def write(self, s):
1307        """Write string s to stream."""
1308        self._unsupported("write")
1309
1310    def truncate(self, pos=None):
1311        """Truncate size to pos."""
1312        self._unsupported("truncate")
1313
1314    def readline(self):
1315        """Read until newline or EOF.
1316
1317        Returns an empty string if EOF is hit immediately.
1318        """
1319        self._unsupported("readline")
1320
1321    def detach(self):
1322        """
1323        Separate the underlying buffer from the TextIOBase and return it.
1324
1325        After the underlying buffer has been detached, the TextIO is in an
1326        unusable state.
1327        """
1328        self._unsupported("detach")
1329
1330    @property
1331    def encoding(self):
1332        """Subclasses should override."""
1333        return None
1334
1335    @property
1336    def newlines(self):
1337        """Line endings translated so far.
1338
1339        Only line endings translated during reading are considered.
1340
1341        Subclasses should override.
1342        """
1343        return None
1344
1345    @property
1346    def errors(self):
1347        """Error setting of the decoder or encoder.
1348
1349        Subclasses should override."""
1350        return None
1351
1352io.TextIOBase.register(TextIOBase)
1353
1354
1355class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1356    r"""Codec used when reading a file in universal newlines mode.  It wraps
1357    another incremental decoder, translating \r\n and \r into \n.  It also
1358    records the types of newlines encountered.  When used with
1359    translate=False, it ensures that the newline sequence is returned in
1360    one piece.
1361    """
1362    def __init__(self, decoder, translate, errors='strict'):
1363        codecs.IncrementalDecoder.__init__(self, errors=errors)
1364        self.translate = translate
1365        self.decoder = decoder
1366        self.seennl = 0
1367        self.pendingcr = False
1368
1369    def decode(self, input, final=False):
1370        # decode input (with the eventual \r from a previous pass)
1371        if self.decoder is None:
1372            output = input
1373        else:
1374            output = self.decoder.decode(input, final=final)
1375        if self.pendingcr and (output or final):
1376            output = "\r" + output
1377            self.pendingcr = False
1378
1379        # retain last \r even when not translating data:
1380        # then readline() is sure to get \r\n in one pass
1381        if output.endswith("\r") and not final:
1382            output = output[:-1]
1383            self.pendingcr = True
1384
1385        # Record which newlines are read
1386        crlf = output.count('\r\n')
1387        cr = output.count('\r') - crlf
1388        lf = output.count('\n') - crlf
1389        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1390                    | (crlf and self._CRLF)
1391
1392        if self.translate:
1393            if crlf:
1394                output = output.replace("\r\n", "\n")
1395            if cr:
1396                output = output.replace("\r", "\n")
1397
1398        return output
1399
1400    def getstate(self):
1401        if self.decoder is None:
1402            buf = b""
1403            flag = 0
1404        else:
1405            buf, flag = self.decoder.getstate()
1406        flag <<= 1
1407        if self.pendingcr:
1408            flag |= 1
1409        return buf, flag
1410
1411    def setstate(self, state):
1412        buf, flag = state
1413        self.pendingcr = bool(flag & 1)
1414        if self.decoder is not None:
1415            self.decoder.setstate((buf, flag >> 1))
1416
1417    def reset(self):
1418        self.seennl = 0
1419        self.pendingcr = False
1420        if self.decoder is not None:
1421            self.decoder.reset()
1422
1423    _LF = 1
1424    _CR = 2
1425    _CRLF = 4
1426
1427    @property
1428    def newlines(self):
1429        return (None,
1430                "\n",
1431                "\r",
1432                ("\r", "\n"),
1433                "\r\n",
1434                ("\n", "\r\n"),
1435                ("\r", "\r\n"),
1436                ("\r", "\n", "\r\n")
1437               )[self.seennl]
1438
1439
1440class TextIOWrapper(TextIOBase):
1441
1442    r"""Character and line based layer over a BufferedIOBase object, buffer.
1443
1444    encoding gives the name of the encoding that the stream will be
1445    decoded or encoded with. It defaults to locale.getpreferredencoding.
1446
1447    errors determines the strictness of encoding and decoding (see the
1448    codecs.register) and defaults to "strict".
1449
1450    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1451    handling of line endings. If it is None, universal newlines is
1452    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1453    or '\r\n' are translated to '\n' before being returned to the
1454    caller. Conversely, on output, '\n' is translated to the system
1455    default line seperator, os.linesep. If newline is any other of its
1456    legal values, that newline becomes the newline when the file is read
1457    and it is returned untranslated. On output, '\n' is converted to the
1458    newline.
1459
1460    If line_buffering is True, a call to flush is implied when a call to
1461    write contains a newline character.
1462    """
1463
1464    _CHUNK_SIZE = 2048
1465
1466    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1467                 line_buffering=False):
1468        if newline is not None and not isinstance(newline, basestring):
1469            raise TypeError("illegal newline type: %r" % (type(newline),))
1470        if newline not in (None, "", "\n", "\r", "\r\n"):
1471            raise ValueError("illegal newline value: %r" % (newline,))
1472        if encoding is None:
1473            try:
1474                import locale
1475            except ImportError:
1476                # Importing locale may fail if Python is being built
1477                encoding = "ascii"
1478            else:
1479                encoding = locale.getpreferredencoding()
1480
1481        if not isinstance(encoding, basestring):
1482            raise ValueError("invalid encoding: %r" % encoding)
1483
1484        if errors is None:
1485            errors = "strict"
1486        else:
1487            if not isinstance(errors, basestring):
1488                raise ValueError("invalid errors: %r" % errors)
1489
1490        self._buffer = buffer
1491        self._line_buffering = line_buffering
1492        self._encoding = encoding
1493        self._errors = errors
1494        self._readuniversal = not newline
1495        self._readtranslate = newline is None
1496        self._readnl = newline
1497        self._writetranslate = newline != ''
1498        self._writenl = newline or os.linesep
1499        self._encoder = None
1500        self._decoder = None
1501        self._decoded_chars = ''  # buffer for text returned from decoder
1502        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1503        self._snapshot = None  # info for reconstructing decoder state
1504        self._seekable = self._telling = self.buffer.seekable()
1505
1506        if self._seekable and self.writable():
1507            position = self.buffer.tell()
1508            if position != 0:
1509                try:
1510                    self._get_encoder().setstate(0)
1511                except LookupError:
1512                    # Sometimes the encoder doesn't exist
1513                    pass
1514
1515    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1516    # where dec_flags is the second (integer) item of the decoder state
1517    # and next_input is the chunk of input bytes that comes next after the
1518    # snapshot point.  We use this to reconstruct decoder states in tell().
1519
1520    # Naming convention:
1521    #   - "bytes_..." for integer variables that count input bytes
1522    #   - "chars_..." for integer variables that count decoded characters
1523
1524    def __repr__(self):
1525        try:
1526            name = self.name
1527        except AttributeError:
1528            return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1529        else:
1530            return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1531                name, self.encoding)
1532
1533    @property
1534    def encoding(self):
1535        return self._encoding
1536
1537    @property
1538    def errors(self):
1539        return self._errors
1540
1541    @property
1542    def line_buffering(self):
1543        return self._line_buffering
1544
1545    @property
1546    def buffer(self):
1547        return self._buffer
1548
1549    def seekable(self):
1550        return self._seekable
1551
1552    def readable(self):
1553        return self.buffer.readable()
1554
1555    def writable(self):
1556        return self.buffer.writable()
1557
1558    def flush(self):
1559        self.buffer.flush()
1560        self._telling = self._seekable
1561
1562    def close(self):
1563        if self.buffer is not None and not self.closed:
1564            self.flush()
1565            self.buffer.close()
1566
1567    @property
1568    def closed(self):
1569        return self.buffer.closed
1570
1571    @property
1572    def name(self):
1573        return self.buffer.name
1574
1575    def fileno(self):
1576        return self.buffer.fileno()
1577
1578    def isatty(self):
1579        return self.buffer.isatty()
1580
1581    def write(self, s):
1582        if self.closed:
1583            raise ValueError("write to closed file")
1584        if not isinstance(s, unicode):
1585            raise TypeError("can't write %s to text stream" %
1586                            s.__class__.__name__)
1587        length = len(s)
1588        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1589        if haslf and self._writetranslate and self._writenl != "\n":
1590            s = s.replace("\n", self._writenl)
1591        encoder = self._encoder or self._get_encoder()
1592        # XXX What if we were just reading?
1593        b = encoder.encode(s)
1594        self.buffer.write(b)
1595        if self._line_buffering and (haslf or "\r" in s):
1596            self.flush()
1597        self._snapshot = None
1598        if self._decoder:
1599            self._decoder.reset()
1600        return length
1601
1602    def _get_encoder(self):
1603        make_encoder = codecs.getincrementalencoder(self._encoding)
1604        self._encoder = make_encoder(self._errors)
1605        return self._encoder
1606
1607    def _get_decoder(self):
1608        make_decoder = codecs.getincrementaldecoder(self._encoding)
1609        decoder = make_decoder(self._errors)
1610        if self._readuniversal:
1611            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1612        self._decoder = decoder
1613        return decoder
1614
1615    # The following three methods implement an ADT for _decoded_chars.
1616    # Text returned from the decoder is buffered here until the client
1617    # requests it by calling our read() or readline() method.
1618    def _set_decoded_chars(self, chars):
1619        """Set the _decoded_chars buffer."""
1620        self._decoded_chars = chars
1621        self._decoded_chars_used = 0
1622
1623    def _get_decoded_chars(self, n=None):
1624        """Advance into the _decoded_chars buffer."""
1625        offset = self._decoded_chars_used
1626        if n is None:
1627            chars = self._decoded_chars[offset:]
1628        else:
1629            chars = self._decoded_chars[offset:offset + n]
1630        self._decoded_chars_used += len(chars)
1631        return chars
1632
1633    def _rewind_decoded_chars(self, n):
1634        """Rewind the _decoded_chars buffer."""
1635        if self._decoded_chars_used < n:
1636            raise AssertionError("rewind decoded_chars out of bounds")
1637        self._decoded_chars_used -= n
1638
1639    def _read_chunk(self):
1640        """
1641        Read and decode the next chunk of data from the BufferedReader.
1642        """
1643
1644        # The return value is True unless EOF was reached.  The decoded
1645        # string is placed in self._decoded_chars (replacing its previous
1646        # value).  The entire input chunk is sent to the decoder, though
1647        # some of it may remain buffered in the decoder, yet to be
1648        # converted.
1649
1650        if self._decoder is None:
1651            raise ValueError("no decoder")
1652
1653        if self._telling:
1654            # To prepare for tell(), we need to snapshot a point in the
1655            # file where the decoder's input buffer is empty.
1656
1657            dec_buffer, dec_flags = self._decoder.getstate()
1658            # Given this, we know there was a valid snapshot point
1659            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1660
1661        # Read a chunk, decode it, and put the result in self._decoded_chars.
1662        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1663        eof = not input_chunk
1664        self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1665
1666        if self._telling:
1667            # At the snapshot point, len(dec_buffer) bytes before the read,
1668            # the next input to be decoded is dec_buffer + input_chunk.
1669            self._snapshot = (dec_flags, dec_buffer + input_chunk)
1670
1671        return not eof
1672
1673    def _pack_cookie(self, position, dec_flags=0,
1674                           bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1675        # The meaning of a tell() cookie is: seek to position, set the
1676        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1677        # into the decoder with need_eof as the EOF flag, then skip
1678        # chars_to_skip characters of the decoded result.  For most simple
1679        # decoders, tell() will often just give a byte offset in the file.
1680        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1681               (chars_to_skip<<192) | bool(need_eof)<<256)
1682
1683    def _unpack_cookie(self, bigint):
1684        rest, position = divmod(bigint, 1<<64)
1685        rest, dec_flags = divmod(rest, 1<<64)
1686        rest, bytes_to_feed = divmod(rest, 1<<64)
1687        need_eof, chars_to_skip = divmod(rest, 1<<64)
1688        return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1689
1690    def tell(self):
1691        if not self._seekable:
1692            raise IOError("underlying stream is not seekable")
1693        if not self._telling:
1694            raise IOError("telling position disabled by next() call")
1695        self.flush()
1696        position = self.buffer.tell()
1697        decoder = self._decoder
1698        if decoder is None or self._snapshot is None:
1699            if self._decoded_chars:
1700                # This should never happen.
1701                raise AssertionError("pending decoded text")
1702            return position
1703
1704        # Skip backward to the snapshot point (see _read_chunk).
1705        dec_flags, next_input = self._snapshot
1706        position -= len(next_input)
1707
1708        # How many decoded characters have been used up since the snapshot?
1709        chars_to_skip = self._decoded_chars_used
1710        if chars_to_skip == 0:
1711            # We haven't moved from the snapshot point.
1712            return self._pack_cookie(position, dec_flags)
1713
1714        # Starting from the snapshot position, we will walk the decoder
1715        # forward until it gives us enough decoded characters.
1716        saved_state = decoder.getstate()
1717        try:
1718            # Note our initial start point.
1719            decoder.setstate((b'', dec_flags))
1720            start_pos = position
1721            start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1722            need_eof = 0
1723
1724            # Feed the decoder one byte at a time.  As we go, note the
1725            # nearest "safe start point" before the current location
1726            # (a point where the decoder has nothing buffered, so seek()
1727            # can safely start from there and advance to this location).
1728            for next_byte in next_input:
1729                bytes_fed += 1
1730                chars_decoded += len(decoder.decode(next_byte))
1731                dec_buffer, dec_flags = decoder.getstate()
1732                if not dec_buffer and chars_decoded <= chars_to_skip:
1733                    # Decoder buffer is empty, so this is a safe start point.
1734                    start_pos += bytes_fed
1735                    chars_to_skip -= chars_decoded
1736                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1737                if chars_decoded >= chars_to_skip:
1738                    break
1739            else:
1740                # We didn't get enough decoded data; signal EOF to get more.
1741                chars_decoded += len(decoder.decode(b'', final=True))
1742                need_eof = 1
1743                if chars_decoded < chars_to_skip:
1744                    raise IOError("can't reconstruct logical file position")
1745
1746            # The returned cookie corresponds to the last safe start point.
1747            return self._pack_cookie(
1748                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1749        finally:
1750            decoder.setstate(saved_state)
1751
1752    def truncate(self, pos=None):
1753        self.flush()
1754        if pos is None:
1755            pos = self.tell()
1756        return self.buffer.truncate(pos)
1757
1758    def detach(self):
1759        if self.buffer is None:
1760            raise ValueError("buffer is already detached")
1761        self.flush()
1762        buffer = self._buffer
1763        self._buffer = None
1764        return buffer
1765
1766    def seek(self, cookie, whence=0):
1767        if self.closed:
1768            raise ValueError("tell on closed file")
1769        if not self._seekable:
1770            raise IOError("underlying stream is not seekable")
1771        if whence == 1: # seek relative to current position
1772            if cookie != 0:
1773                raise IOError("can't do nonzero cur-relative seeks")
1774            # Seeking to the current position should attempt to
1775            # sync the underlying buffer with the current position.
1776            whence = 0
1777            cookie = self.tell()
1778        if whence == 2: # seek relative to end of file
1779            if cookie != 0:
1780                raise IOError("can't do nonzero end-relative seeks")
1781            self.flush()
1782            position = self.buffer.seek(0, 2)
1783            self._set_decoded_chars('')
1784            self._snapshot = None
1785            if self._decoder:
1786                self._decoder.reset()
1787            return position
1788        if whence != 0:
1789            raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1790                             (whence,))
1791        if cookie < 0:
1792            raise ValueError("negative seek position %r" % (cookie,))
1793        self.flush()
1794
1795        # The strategy of seek() is to go back to the safe start point
1796        # and replay the effect of read(chars_to_skip) from there.
1797        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1798            self._unpack_cookie(cookie)
1799
1800        # Seek back to the safe start point.
1801        self.buffer.seek(start_pos)
1802        self._set_decoded_chars('')
1803        self._snapshot = None
1804
1805        # Restore the decoder to its state from the safe start point.
1806        if cookie == 0 and self._decoder:
1807            self._decoder.reset()
1808        elif self._decoder or dec_flags or chars_to_skip:
1809            self._decoder = self._decoder or self._get_decoder()
1810            self._decoder.setstate((b'', dec_flags))
1811            self._snapshot = (dec_flags, b'')
1812
1813        if chars_to_skip:
1814            # Just like _read_chunk, feed the decoder and save a snapshot.
1815            input_chunk = self.buffer.read(bytes_to_feed)
1816            self._set_decoded_chars(
1817                self._decoder.decode(input_chunk, need_eof))
1818            self._snapshot = (dec_flags, input_chunk)
1819
1820            # Skip chars_to_skip of the decoded characters.
1821            if len(self._decoded_chars) < chars_to_skip:
1822                raise IOError("can't restore logical file position")
1823            self._decoded_chars_used = chars_to_skip
1824
1825        # Finally, reset the encoder (merely useful for proper BOM handling)
1826        try:
1827            encoder = self._encoder or self._get_encoder()
1828        except LookupError:
1829            # Sometimes the encoder doesn't exist
1830            pass
1831        else:
1832            if cookie != 0:
1833                encoder.setstate(0)
1834            else:
1835                encoder.reset()
1836        return cookie
1837
1838    def read(self, n=None):
1839        self._checkReadable()
1840        if n is None:
1841            n = -1
1842        decoder = self._decoder or self._get_decoder()
1843        try:
1844            n.__index__
1845        except AttributeError:
1846            raise TypeError("an integer is required")
1847        if n < 0:
1848            # Read everything.
1849            result = (self._get_decoded_chars() +
1850                      decoder.decode(self.buffer.read(), final=True))
1851            self._set_decoded_chars('')
1852            self._snapshot = None
1853            return result
1854        else:
1855            # Keep reading chunks until we have n characters to return.
1856            eof = False
1857            result = self._get_decoded_chars(n)
1858            while len(result) < n and not eof:
1859                eof = not self._read_chunk()
1860                result += self._get_decoded_chars(n - len(result))
1861            return result
1862
1863    def next(self):
1864        self._telling = False
1865        line = self.readline()
1866        if not line:
1867            self._snapshot = None
1868            self._telling = self._seekable
1869            raise StopIteration
1870        return line
1871
1872    def readline(self, limit=None):
1873        if self.closed:
1874            raise ValueError("read from closed file")
1875        if limit is None:
1876            limit = -1
1877        elif not isinstance(limit, (int, long)):
1878            raise TypeError("limit must be an integer")
1879
1880        # Grab all the decoded text (we will rewind any extra bits later).
1881        line = self._get_decoded_chars()
1882
1883        start = 0
1884        # Make the decoder if it doesn't already exist.
1885        if not self._decoder:
1886            self._get_decoder()
1887
1888        pos = endpos = None
1889        while True:
1890            if self._readtranslate:
1891                # Newlines are already translated, only search for \n
1892                pos = line.find('\n', start)
1893                if pos >= 0:
1894                    endpos = pos + 1
1895                    break
1896                else:
1897                    start = len(line)
1898
1899            elif self._readuniversal:
1900                # Universal newline search. Find any of \r, \r\n, \n
1901                # The decoder ensures that \r\n are not split in two pieces
1902
1903                # In C we'd look for these in parallel of course.
1904                nlpos = line.find("\n", start)
1905                crpos = line.find("\r", start)
1906                if crpos == -1:
1907                    if nlpos == -1:
1908                        # Nothing found
1909                        start = len(line)
1910                    else:
1911                        # Found \n
1912                        endpos = nlpos + 1
1913                        break
1914                elif nlpos == -1:
1915                    # Found lone \r
1916                    endpos = crpos + 1
1917                    break
1918                elif nlpos < crpos:
1919                    # Found \n
1920                    endpos = nlpos + 1
1921                    break
1922                elif nlpos == crpos + 1:
1923                    # Found \r\n
1924                    endpos = crpos + 2
1925                    break
1926                else:
1927                    # Found \r
1928                    endpos = crpos + 1
1929                    break
1930            else:
1931                # non-universal
1932                pos = line.find(self._readnl)
1933                if pos >= 0:
1934                    endpos = pos + len(self._readnl)
1935                    break
1936
1937            if limit >= 0 and len(line) >= limit:
1938                endpos = limit  # reached length limit
1939                break
1940
1941            # No line ending seen yet - get more data'
1942            while self._read_chunk():
1943                if self._decoded_chars:
1944                    break
1945            if self._decoded_chars:
1946                line += self._get_decoded_chars()
1947            else:
1948                # end of file
1949                self._set_decoded_chars('')
1950                self._snapshot = None
1951                return line
1952
1953        if limit >= 0 and endpos > limit:
1954            endpos = limit  # don't exceed limit
1955
1956        # Rewind _decoded_chars to just after the line ending we found.
1957        self._rewind_decoded_chars(len(line) - endpos)
1958        return line[:endpos]
1959
1960    @property
1961    def newlines(self):
1962        return self._decoder.newlines if self._decoder else None
1963
1964
1965class StringIO(TextIOWrapper):
1966    """Text I/O implementation using an in-memory buffer.
1967
1968    The initial_value argument sets the value of object.  The newline
1969    argument is like the one of TextIOWrapper's constructor.
1970    """
1971
1972    def __init__(self, initial_value="", newline="\n"):
1973        super(StringIO, self).__init__(BytesIO(),
1974                                       encoding="utf-8",
1975                                       errors="strict",
1976                                       newline=newline)
1977        # Issue #5645: make universal newlines semantics the same as in the
1978        # C version, even under Windows.
1979        if newline is None:
1980            self._writetranslate = False
1981        if initial_value:
1982            if not isinstance(initial_value, unicode):
1983                initial_value = unicode(initial_value)
1984            self.write(initial_value)
1985            self.seek(0)
1986
1987    def getvalue(self):
1988        self.flush()
1989        return self.buffer.getvalue().decode(self._encoding, self._errors)
1990
1991    def __repr__(self):
1992        # TextIOWrapper tells the encoding in its repr. In StringIO,
1993        # that's a implementation detail.
1994        return object.__repr__(self)
1995
1996    @property
1997    def errors(self):
1998        return None
1999
2000    @property
2001    def encoding(self):
2002        return None
2003
2004    def detach(self):
2005        # This doesn't make sense on StringIO.
2006        self._unsupported("detach")
2007