1"""
2Python implementation of the io module.
3"""
4
5from __future__ import (print_function, unicode_literals)
6
7import os
8import abc
9import codecs
10import warnings
11import errno
12# Import thread instead of threading to reduce startup cost
13try:
14    from thread import allocate_lock as Lock
15except ImportError:
16    from dummy_thread import allocate_lock as Lock
17
18import io
19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20from errno import EINTR
21
22__metaclass__ = type
23
24# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
31
32class BlockingIOError(IOError):
33
34    """Exception raised when I/O would block on a non-blocking I/O stream."""
35
36    def __init__(self, errno, strerror, characters_written=0):
37        super(IOError, self).__init__(errno, strerror)
38        if not isinstance(characters_written, (int, long)):
39            raise TypeError("characters_written must be a integer")
40        self.characters_written = characters_written
41
42
43def open(file, mode="r", buffering=-1,
44         encoding=None, errors=None,
45         newline=None, closefd=True):
46
47    r"""Open file and return a stream.  Raise IOError upon failure.
48
49    file is either a text or byte string giving the name (and the path
50    if the file isn't in the current working directory) of the file to
51    be opened or an integer file descriptor of the file to be
52    wrapped. (If a file descriptor is given, it is closed when the
53    returned I/O object is closed, unless closefd is set to False.)
54
55    mode is an optional string that specifies the mode in which the file
56    is opened. It defaults to 'r' which means open for reading in text
57    mode.  Other common values are 'w' for writing (truncating the file if
58    it already exists), and 'a' for appending (which on some Unix systems,
59    means that all writes append to the end of the file regardless of the
60    current seek position). In text mode, if encoding is not specified the
61    encoding used is platform dependent. (For reading and writing raw
62    bytes use binary mode and leave encoding unspecified.) The available
63    modes are:
64
65    ========= ===============================================================
66    Character Meaning
67    --------- ---------------------------------------------------------------
68    'r'       open for reading (default)
69    'w'       open for writing, truncating the file first
70    'a'       open for writing, appending to the end of the file if it exists
71    'b'       binary mode
72    't'       text mode (default)
73    '+'       open a disk file for updating (reading and writing)
74    'U'       universal newline mode (for backwards compatibility; unneeded
75              for new code)
76    ========= ===============================================================
77
78    The default mode is 'rt' (open for reading text). For binary random
79    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80    'r+b' opens the file without truncation.
81
82    Python distinguishes between files opened in binary and text modes,
83    even when the underlying operating system doesn't. Files opened in
84    binary mode (appending 'b' to the mode argument) return contents as
85    bytes objects without any decoding. In text mode (the default, or when
86    't' is appended to the mode argument), the contents of the file are
87    returned as strings, the bytes having been first decoded using a
88    platform-dependent encoding or using the specified encoding if given.
89
90    buffering is an optional integer used to set the buffering policy.
91    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
92    line buffering (only usable in text mode), and an integer > 1 to indicate
93    the size of a fixed-size chunk buffer.  When no buffering argument is
94    given, the default buffering policy works as follows:
95
96    * Binary files are buffered in fixed-size chunks; the size of the buffer
97      is chosen using a heuristic trying to determine the underlying device's
98      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
99      On many systems, the buffer will typically be 4096 or 8192 bytes long.
100
101    * "Interactive" text files (files for which isatty() returns True)
102      use line buffering.  Other text files use the policy described above
103      for binary files.
104
105    encoding is the name of the encoding used to decode or encode the
106    file. This should only be used in text mode. The default encoding is
107    platform dependent, but any encoding supported by Python can be
108    passed.  See the codecs module for the list of supported encodings.
109
110    errors is an optional string that specifies how encoding errors are to
111    be handled---this argument should not be used in binary mode. Pass
112    'strict' to raise a ValueError exception if there is an encoding error
113    (the default of None has the same effect), or pass 'ignore' to ignore
114    errors. (Note that ignoring encoding errors can lead to data loss.)
115    See the documentation for codecs.register for a list of the permitted
116    encoding error strings.
117
118    newline controls how universal newlines works (it only applies to text
119    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
120    follows:
121
122    * On input, if newline is None, universal newlines mode is
123      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
124      these are translated into '\n' before being returned to the
125      caller. If it is '', universal newline mode is enabled, but line
126      endings are returned to the caller untranslated. If it has any of
127      the other legal values, input lines are only terminated by the given
128      string, and the line ending is returned to the caller untranslated.
129
130    * On output, if newline is None, any '\n' characters written are
131      translated to the system default line separator, os.linesep. If
132      newline is '', no translation takes place. If newline is any of the
133      other legal values, any '\n' characters written are translated to
134      the given string.
135
136    If closefd is False, the underlying file descriptor will be kept open
137    when the file is closed. This does not work when a file name is given
138    and must be True in that case.
139
140    open() returns a file object whose type depends on the mode, and
141    through which the standard file operations such as reading and writing
142    are performed. When open() is used to open a file in a text mode ('w',
143    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
144    a file in a binary mode, the returned class varies: in read binary
145    mode, it returns a BufferedReader; in write binary and append binary
146    modes, it returns a BufferedWriter, and in read/write mode, it returns
147    a BufferedRandom.
148
149    It is also possible to use a string or bytearray as a file for both
150    reading and writing. For strings StringIO can be used like a file
151    opened in a text mode, and for bytes a BytesIO can be used like a file
152    opened in a binary mode.
153    """
154    if not isinstance(file, (basestring, int, long)):
155        raise TypeError("invalid file: %r" % file)
156    if not isinstance(mode, basestring):
157        raise TypeError("invalid mode: %r" % mode)
158    if not isinstance(buffering, (int, long)):
159        raise TypeError("invalid buffering: %r" % buffering)
160    if encoding is not None and not isinstance(encoding, basestring):
161        raise TypeError("invalid encoding: %r" % encoding)
162    if errors is not None and not isinstance(errors, basestring):
163        raise TypeError("invalid errors: %r" % errors)
164    modes = set(mode)
165    if modes - set("arwb+tU") or len(mode) > len(modes):
166        raise ValueError("invalid mode: %r" % mode)
167    reading = "r" in modes
168    writing = "w" in modes
169    appending = "a" in modes
170    updating = "+" in modes
171    text = "t" in modes
172    binary = "b" in modes
173    if "U" in modes:
174        if writing or appending:
175            raise ValueError("can't use U and writing mode at once")
176        reading = True
177    if text and binary:
178        raise ValueError("can't have text and binary mode at once")
179    if reading + writing + appending > 1:
180        raise ValueError("can't have read/write/append mode at once")
181    if not (reading or writing or appending):
182        raise ValueError("must have exactly one of read/write/append mode")
183    if binary and encoding is not None:
184        raise ValueError("binary mode doesn't take an encoding argument")
185    if binary and errors is not None:
186        raise ValueError("binary mode doesn't take an errors argument")
187    if binary and newline is not None:
188        raise ValueError("binary mode doesn't take a newline argument")
189    raw = FileIO(file,
190                 (reading and "r" or "") +
191                 (writing and "w" or "") +
192                 (appending and "a" or "") +
193                 (updating and "+" or ""),
194                 closefd)
195    line_buffering = False
196    if buffering == 1 or buffering < 0 and raw.isatty():
197        buffering = -1
198        line_buffering = True
199    if buffering < 0:
200        buffering = DEFAULT_BUFFER_SIZE
201        try:
202            bs = os.fstat(raw.fileno()).st_blksize
203        except (os.error, AttributeError):
204            pass
205        else:
206            if bs > 1:
207                buffering = bs
208    if buffering < 0:
209        raise ValueError("invalid buffering size")
210    if buffering == 0:
211        if binary:
212            return raw
213        raise ValueError("can't have unbuffered text I/O")
214    if updating:
215        buffer = BufferedRandom(raw, buffering)
216    elif writing or appending:
217        buffer = BufferedWriter(raw, buffering)
218    elif reading:
219        buffer = BufferedReader(raw, buffering)
220    else:
221        raise ValueError("unknown mode: %r" % mode)
222    if binary:
223        return buffer
224    text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225    text.mode = mode
226    return text
227
228
229class DocDescriptor:
230    """Helper for builtins.open.__doc__
231    """
232    def __get__(self, obj, typ):
233        return (
234            "open(file, mode='r', buffering=-1, encoding=None, "
235                 "errors=None, newline=None, closefd=True)\n\n" +
236            open.__doc__)
237
238class OpenWrapper:
239    """Wrapper for builtins.open
240
241    Trick so that open won't become a bound method when stored
242    as a class variable (as dbm.dumb does).
243
244    See initstdio() in Python/pythonrun.c.
245    """
246    __doc__ = DocDescriptor()
247
248    def __new__(cls, *args, **kwargs):
249        return open(*args, **kwargs)
250
251
252class UnsupportedOperation(ValueError, IOError):
253    pass
254
255
256class IOBase:
257    __metaclass__ = abc.ABCMeta
258
259    """The abstract base class for all I/O classes, acting on streams of
260    bytes. There is no public constructor.
261
262    This class provides dummy implementations for many methods that
263    derived classes can override selectively; the default implementations
264    represent a file that cannot be read, written or seeked.
265
266    Even though IOBase does not declare read, readinto, or write because
267    their signatures will vary, implementations and clients should
268    consider those methods part of the interface. Also, implementations
269    may raise a IOError when operations they do not support are called.
270
271    The basic type used for binary data read from or written to a file is
272    bytes. bytearrays are accepted too, and in some cases (such as
273    readinto) needed. Text I/O classes work with str data.
274
275    Note that calling any method (even inquiries) on a closed stream is
276    undefined. Implementations may raise IOError in this case.
277
278    IOBase (and its subclasses) support the iterator protocol, meaning
279    that an IOBase object can be iterated over yielding the lines in a
280    stream.
281
282    IOBase also supports the :keyword:`with` statement. In this example,
283    fp is closed after the suite of the with statement is complete:
284
285    with open('spam.txt', 'r') as fp:
286        fp.write('Spam and eggs!')
287    """
288
289    ### Internal ###
290
291    def _unsupported(self, name):
292        """Internal: raise an exception for unsupported operations."""
293        raise UnsupportedOperation("%s.%s() not supported" %
294                                   (self.__class__.__name__, name))
295
296    ### Positioning ###
297
298    def seek(self, pos, whence=0):
299        """Change stream position.
300
301        Change the stream position to byte offset pos. Argument pos is
302        interpreted relative to the position indicated by whence.  Values
303        for whence are:
304
305        * 0 -- start of stream (the default); offset should be zero or positive
306        * 1 -- current stream position; offset may be negative
307        * 2 -- end of stream; offset is usually negative
308
309        Return the new absolute position.
310        """
311        self._unsupported("seek")
312
313    def tell(self):
314        """Return current stream position."""
315        return self.seek(0, 1)
316
317    def truncate(self, pos=None):
318        """Truncate file to size bytes.
319
320        Size defaults to the current IO position as reported by tell().  Return
321        the new size.
322        """
323        self._unsupported("truncate")
324
325    ### Flush and close ###
326
327    def flush(self):
328        """Flush write buffers, if applicable.
329
330        This is not implemented for read-only and non-blocking streams.
331        """
332        self._checkClosed()
333        # XXX Should this return the number of bytes written???
334
335    __closed = False
336
337    def close(self):
338        """Flush and close the IO object.
339
340        This method has no effect if the file is already closed.
341        """
342        if not self.__closed:
343            try:
344                self.flush()
345            finally:
346                self.__closed = True
347
348    def __del__(self):
349        """Destructor.  Calls close()."""
350        # The try/except block is in case this is called at program
351        # exit time, when it's possible that globals have already been
352        # deleted, and then the close() call might fail.  Since
353        # there's nothing we can do about such failures and they annoy
354        # the end users, we suppress the traceback.
355        try:
356            self.close()
357        except:
358            pass
359
360    ### Inquiries ###
361
362    def seekable(self):
363        """Return whether object supports random access.
364
365        If False, seek(), tell() and truncate() will raise IOError.
366        This method may need to do a test seek().
367        """
368        return False
369
370    def _checkSeekable(self, msg=None):
371        """Internal: raise an IOError if file is not seekable
372        """
373        if not self.seekable():
374            raise IOError("File or stream is not seekable."
375                          if msg is None else msg)
376
377
378    def readable(self):
379        """Return whether object was opened for reading.
380
381        If False, read() will raise IOError.
382        """
383        return False
384
385    def _checkReadable(self, msg=None):
386        """Internal: raise an IOError if file is not readable
387        """
388        if not self.readable():
389            raise IOError("File or stream is not readable."
390                          if msg is None else msg)
391
392    def writable(self):
393        """Return whether object was opened for writing.
394
395        If False, write() and truncate() will raise IOError.
396        """
397        return False
398
399    def _checkWritable(self, msg=None):
400        """Internal: raise an IOError if file is not writable
401        """
402        if not self.writable():
403            raise IOError("File or stream is not writable."
404                          if msg is None else msg)
405
406    @property
407    def closed(self):
408        """closed: bool.  True iff the file has been closed.
409
410        For backwards compatibility, this is a property, not a predicate.
411        """
412        return self.__closed
413
414    def _checkClosed(self, msg=None):
415        """Internal: raise an ValueError if file is closed
416        """
417        if self.closed:
418            raise ValueError("I/O operation on closed file."
419                             if msg is None else msg)
420
421    ### Context manager ###
422
423    def __enter__(self):
424        """Context management protocol.  Returns self."""
425        self._checkClosed()
426        return self
427
428    def __exit__(self, *args):
429        """Context management protocol.  Calls close()"""
430        self.close()
431
432    ### Lower-level APIs ###
433
434    # XXX Should these be present even if unimplemented?
435
436    def fileno(self):
437        """Returns underlying file descriptor if one exists.
438
439        An IOError is raised if the IO object does not use a file descriptor.
440        """
441        self._unsupported("fileno")
442
443    def isatty(self):
444        """Return whether this is an 'interactive' stream.
445
446        Return False if it can't be determined.
447        """
448        self._checkClosed()
449        return False
450
451    ### Readline[s] and writelines ###
452
453    def readline(self, limit=-1):
454        r"""Read and return a line from the stream.
455
456        If limit is specified, at most limit bytes will be read.
457
458        The line terminator is always b'\n' for binary files; for text
459        files, the newlines argument to open can be used to select the line
460        terminator(s) recognized.
461        """
462        # For backwards compatibility, a (slowish) readline().
463        if hasattr(self, "peek"):
464            def nreadahead():
465                readahead = self.peek(1)
466                if not readahead:
467                    return 1
468                n = (readahead.find(b"\n") + 1) or len(readahead)
469                if limit >= 0:
470                    n = min(n, limit)
471                return n
472        else:
473            def nreadahead():
474                return 1
475        if limit is None:
476            limit = -1
477        elif not isinstance(limit, (int, long)):
478            raise TypeError("limit must be an integer")
479        res = bytearray()
480        while limit < 0 or len(res) < limit:
481            b = self.read(nreadahead())
482            if not b:
483                break
484            res += b
485            if res.endswith(b"\n"):
486                break
487        return bytes(res)
488
489    def __iter__(self):
490        self._checkClosed()
491        return self
492
493    def next(self):
494        line = self.readline()
495        if not line:
496            raise StopIteration
497        return line
498
499    def readlines(self, hint=None):
500        """Return a list of lines from the stream.
501
502        hint can be specified to control the number of lines read: no more
503        lines will be read if the total size (in bytes/characters) of all
504        lines so far exceeds hint.
505        """
506        if hint is not None and not isinstance(hint, (int, long)):
507            raise TypeError("integer or None expected")
508        if hint is None or hint <= 0:
509            return list(self)
510        n = 0
511        lines = []
512        for line in self:
513            lines.append(line)
514            n += len(line)
515            if n >= hint:
516                break
517        return lines
518
519    def writelines(self, lines):
520        self._checkClosed()
521        for line in lines:
522            self.write(line)
523
524io.IOBase.register(IOBase)
525
526
527class RawIOBase(IOBase):
528
529    """Base class for raw binary I/O."""
530
531    # The read() method is implemented by calling readinto(); derived
532    # classes that want to support read() only need to implement
533    # readinto() as a primitive operation.  In general, readinto() can be
534    # more efficient than read().
535
536    # (It would be tempting to also provide an implementation of
537    # readinto() in terms of read(), in case the latter is a more suitable
538    # primitive operation, but that would lead to nasty recursion in case
539    # a subclass doesn't implement either.)
540
541    def read(self, n=-1):
542        """Read and return up to n bytes.
543
544        Returns an empty bytes object on EOF, or None if the object is
545        set not to block and has no data to read.
546        """
547        if n is None:
548            n = -1
549        if n < 0:
550            return self.readall()
551        b = bytearray(n.__index__())
552        n = self.readinto(b)
553        if n is None:
554            return None
555        del b[n:]
556        return bytes(b)
557
558    def readall(self):
559        """Read until EOF, using multiple read() call."""
560        res = bytearray()
561        while True:
562            data = self.read(DEFAULT_BUFFER_SIZE)
563            if not data:
564                break
565            res += data
566        if res:
567            return bytes(res)
568        else:
569            # b'' or None
570            return data
571
572    def readinto(self, b):
573        """Read up to len(b) bytes into b.
574
575        Returns number of bytes read (0 for EOF), or None if the object
576        is set not to block and has no data to read.
577        """
578        self._unsupported("readinto")
579
580    def write(self, b):
581        """Write the given buffer to the IO stream.
582
583        Returns the number of bytes written, which may be less than len(b).
584        """
585        self._unsupported("write")
586
587io.RawIOBase.register(RawIOBase)
588from _io import FileIO
589RawIOBase.register(FileIO)
590
591
592class BufferedIOBase(IOBase):
593
594    """Base class for buffered IO objects.
595
596    The main difference with RawIOBase is that the read() method
597    supports omitting the size argument, and does not have a default
598    implementation that defers to readinto().
599
600    In addition, read(), readinto() and write() may raise
601    BlockingIOError if the underlying raw stream is in non-blocking
602    mode and not ready; unlike their raw counterparts, they will never
603    return None.
604
605    A typical implementation should not inherit from a RawIOBase
606    implementation, but wrap one.
607    """
608
609    def read(self, n=None):
610        """Read and return up to n bytes.
611
612        If the argument is omitted, None, or negative, reads and
613        returns all data until EOF.
614
615        If the argument is positive, and the underlying raw stream is
616        not 'interactive', multiple raw reads may be issued to satisfy
617        the byte count (unless EOF is reached first).  But for
618        interactive raw streams (XXX and for pipes?), at most one raw
619        read will be issued, and a short result does not imply that
620        EOF is imminent.
621
622        Returns an empty bytes array on EOF.
623
624        Raises BlockingIOError if the underlying raw stream has no
625        data at the moment.
626        """
627        self._unsupported("read")
628
629    def read1(self, n=None):
630        """Read up to n bytes with at most one read() system call."""
631        self._unsupported("read1")
632
633    def readinto(self, b):
634        """Read up to len(b) bytes into b.
635
636        Like read(), this may issue multiple reads to the underlying raw
637        stream, unless the latter is 'interactive'.
638
639        Returns the number of bytes read (0 for EOF).
640
641        Raises BlockingIOError if the underlying raw stream has no
642        data at the moment.
643        """
644        # XXX This ought to work with anything that supports the buffer API
645        data = self.read(len(b))
646        n = len(data)
647        try:
648            b[:n] = data
649        except TypeError as err:
650            import array
651            if not isinstance(b, array.array):
652                raise err
653            b[:n] = array.array(b'b', data)
654        return n
655
656    def write(self, b):
657        """Write the given buffer to the IO stream.
658
659        Return the number of bytes written, which is never less than
660        len(b).
661
662        Raises BlockingIOError if the buffer is full and the
663        underlying raw stream cannot accept more data at the moment.
664        """
665        self._unsupported("write")
666
667    def detach(self):
668        """
669        Separate the underlying raw stream from the buffer and return it.
670
671        After the raw stream has been detached, the buffer is in an unusable
672        state.
673        """
674        self._unsupported("detach")
675
676io.BufferedIOBase.register(BufferedIOBase)
677
678
679class _BufferedIOMixin(BufferedIOBase):
680
681    """A mixin implementation of BufferedIOBase with an underlying raw stream.
682
683    This passes most requests on to the underlying raw stream.  It
684    does *not* provide implementations of read(), readinto() or
685    write().
686    """
687
688    def __init__(self, raw):
689        self._raw = raw
690
691    ### Positioning ###
692
693    def seek(self, pos, whence=0):
694        new_position = self.raw.seek(pos, whence)
695        if new_position < 0:
696            raise IOError("seek() returned an invalid position")
697        return new_position
698
699    def tell(self):
700        pos = self.raw.tell()
701        if pos < 0:
702            raise IOError("tell() returned an invalid position")
703        return pos
704
705    def truncate(self, pos=None):
706        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
707        # and a flush may be necessary to synch both views of the current
708        # file state.
709        self.flush()
710
711        if pos is None:
712            pos = self.tell()
713        # XXX: Should seek() be used, instead of passing the position
714        # XXX  directly to truncate?
715        return self.raw.truncate(pos)
716
717    ### Flush and close ###
718
719    def flush(self):
720        if self.closed:
721            raise ValueError("flush of closed file")
722        self.raw.flush()
723
724    def close(self):
725        if self.raw is not None and not self.closed:
726            try:
727                # may raise BlockingIOError or BrokenPipeError etc
728                self.flush()
729            finally:
730                self.raw.close()
731
732    def detach(self):
733        if self.raw is None:
734            raise ValueError("raw stream already detached")
735        self.flush()
736        raw = self._raw
737        self._raw = None
738        return raw
739
740    ### Inquiries ###
741
742    def seekable(self):
743        return self.raw.seekable()
744
745    def readable(self):
746        return self.raw.readable()
747
748    def writable(self):
749        return self.raw.writable()
750
751    @property
752    def raw(self):
753        return self._raw
754
755    @property
756    def closed(self):
757        return self.raw.closed
758
759    @property
760    def name(self):
761        return self.raw.name
762
763    @property
764    def mode(self):
765        return self.raw.mode
766
767    def __repr__(self):
768        clsname = self.__class__.__name__
769        try:
770            name = self.name
771        except AttributeError:
772            return "<_pyio.{0}>".format(clsname)
773        else:
774            return "<_pyio.{0} name={1!r}>".format(clsname, name)
775
776    ### Lower-level APIs ###
777
778    def fileno(self):
779        return self.raw.fileno()
780
781    def isatty(self):
782        return self.raw.isatty()
783
784
785class BytesIO(BufferedIOBase):
786
787    """Buffered I/O implementation using an in-memory bytes buffer."""
788
789    def __init__(self, initial_bytes=None):
790        buf = bytearray()
791        if initial_bytes is not None:
792            buf.extend(initial_bytes)
793        self._buffer = buf
794        self._pos = 0
795
796    def __getstate__(self):
797        if self.closed:
798            raise ValueError("__getstate__ on closed file")
799        return self.__dict__.copy()
800
801    def getvalue(self):
802        """Return the bytes value (contents) of the buffer
803        """
804        if self.closed:
805            raise ValueError("getvalue on closed file")
806        return bytes(self._buffer)
807
808    def read(self, n=None):
809        if self.closed:
810            raise ValueError("read from closed file")
811        if n is None:
812            n = -1
813        if not isinstance(n, (int, long)):
814            raise TypeError("integer argument expected, got {0!r}".format(
815                type(n)))
816        if n < 0:
817            n = len(self._buffer)
818        if len(self._buffer) <= self._pos:
819            return b""
820        newpos = min(len(self._buffer), self._pos + n)
821        b = self._buffer[self._pos : newpos]
822        self._pos = newpos
823        return bytes(b)
824
825    def read1(self, n):
826        """This is the same as read.
827        """
828        return self.read(n)
829
830    def write(self, b):
831        if self.closed:
832            raise ValueError("write to closed file")
833        if isinstance(b, unicode):
834            raise TypeError("can't write unicode to binary stream")
835        n = len(b)
836        if n == 0:
837            return 0
838        pos = self._pos
839        if pos > len(self._buffer):
840            # Inserts null bytes between the current end of the file
841            # and the new write position.
842            padding = b'\x00' * (pos - len(self._buffer))
843            self._buffer += padding
844        self._buffer[pos:pos + n] = b
845        self._pos += n
846        return n
847
848    def seek(self, pos, whence=0):
849        if self.closed:
850            raise ValueError("seek on closed file")
851        try:
852            pos.__index__
853        except AttributeError:
854            raise TypeError("an integer is required")
855        if whence == 0:
856            if pos < 0:
857                raise ValueError("negative seek position %r" % (pos,))
858            self._pos = pos
859        elif whence == 1:
860            self._pos = max(0, self._pos + pos)
861        elif whence == 2:
862            self._pos = max(0, len(self._buffer) + pos)
863        else:
864            raise ValueError("invalid whence value")
865        return self._pos
866
867    def tell(self):
868        if self.closed:
869            raise ValueError("tell on closed file")
870        return self._pos
871
872    def truncate(self, pos=None):
873        if self.closed:
874            raise ValueError("truncate on closed file")
875        if pos is None:
876            pos = self._pos
877        else:
878            try:
879                pos.__index__
880            except AttributeError:
881                raise TypeError("an integer is required")
882            if pos < 0:
883                raise ValueError("negative truncate position %r" % (pos,))
884        del self._buffer[pos:]
885        return pos
886
887    def readable(self):
888        if self.closed:
889            raise ValueError("I/O operation on closed file.")
890        return True
891
892    def writable(self):
893        if self.closed:
894            raise ValueError("I/O operation on closed file.")
895        return True
896
897    def seekable(self):
898        if self.closed:
899            raise ValueError("I/O operation on closed file.")
900        return True
901
902
903class BufferedReader(_BufferedIOMixin):
904
905    """BufferedReader(raw[, buffer_size])
906
907    A buffer for a readable, sequential BaseRawIO object.
908
909    The constructor creates a BufferedReader for the given readable raw
910    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
911    is used.
912    """
913
914    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
915        """Create a new buffered reader using the given readable raw IO object.
916        """
917        if not raw.readable():
918            raise IOError('"raw" argument must be readable.')
919
920        _BufferedIOMixin.__init__(self, raw)
921        if buffer_size <= 0:
922            raise ValueError("invalid buffer size")
923        self.buffer_size = buffer_size
924        self._reset_read_buf()
925        self._read_lock = Lock()
926
927    def _reset_read_buf(self):
928        self._read_buf = b""
929        self._read_pos = 0
930
931    def read(self, n=None):
932        """Read n bytes.
933
934        Returns exactly n bytes of data unless the underlying raw IO
935        stream reaches EOF or if the call would block in non-blocking
936        mode. If n is negative, read until EOF or until read() would
937        block.
938        """
939        if n is not None and n < -1:
940            raise ValueError("invalid number of bytes to read")
941        with self._read_lock:
942            return self._read_unlocked(n)
943
944    def _read_unlocked(self, n=None):
945        nodata_val = b""
946        empty_values = (b"", None)
947        buf = self._read_buf
948        pos = self._read_pos
949
950        # Special case for when the number of bytes to read is unspecified.
951        if n is None or n == -1:
952            self._reset_read_buf()
953            chunks = [buf[pos:]]  # Strip the consumed bytes.
954            current_size = 0
955            while True:
956                # Read until EOF or until read() would block.
957                try:
958                    chunk = self.raw.read()
959                except IOError as e:
960                    if e.errno != EINTR:
961                        raise
962                    continue
963                if chunk in empty_values:
964                    nodata_val = chunk
965                    break
966                current_size += len(chunk)
967                chunks.append(chunk)
968            return b"".join(chunks) or nodata_val
969
970        # The number of bytes to read is specified, return at most n bytes.
971        avail = len(buf) - pos  # Length of the available buffered data.
972        if n <= avail:
973            # Fast path: the data to read is fully buffered.
974            self._read_pos += n
975            return buf[pos:pos+n]
976        # Slow path: read from the stream until enough bytes are read,
977        # or until an EOF occurs or until read() would block.
978        chunks = [buf[pos:]]
979        wanted = max(self.buffer_size, n)
980        while avail < n:
981            try:
982                chunk = self.raw.read(wanted)
983            except IOError as e:
984                if e.errno != EINTR:
985                    raise
986                continue
987            if chunk in empty_values:
988                nodata_val = chunk
989                break
990            avail += len(chunk)
991            chunks.append(chunk)
992        # n is more then avail only when an EOF occurred or when
993        # read() would have blocked.
994        n = min(n, avail)
995        out = b"".join(chunks)
996        self._read_buf = out[n:]  # Save the extra data in the buffer.
997        self._read_pos = 0
998        return out[:n] if out else nodata_val
999
1000    def peek(self, n=0):
1001        """Returns buffered bytes without advancing the position.
1002
1003        The argument indicates a desired minimal number of bytes; we
1004        do at most one raw read to satisfy it.  We never return more
1005        than self.buffer_size.
1006        """
1007        with self._read_lock:
1008            return self._peek_unlocked(n)
1009
1010    def _peek_unlocked(self, n=0):
1011        want = min(n, self.buffer_size)
1012        have = len(self._read_buf) - self._read_pos
1013        if have < want or have <= 0:
1014            to_read = self.buffer_size - have
1015            while True:
1016                try:
1017                    current = self.raw.read(to_read)
1018                except IOError as e:
1019                    if e.errno != EINTR:
1020                        raise
1021                    continue
1022                break
1023            if current:
1024                self._read_buf = self._read_buf[self._read_pos:] + current
1025                self._read_pos = 0
1026        return self._read_buf[self._read_pos:]
1027
1028    def read1(self, n):
1029        """Reads up to n bytes, with at most one read() system call."""
1030        # Returns up to n bytes.  If at least one byte is buffered, we
1031        # only return buffered bytes.  Otherwise, we do one raw read.
1032        if n < 0:
1033            raise ValueError("number of bytes to read must be positive")
1034        if n == 0:
1035            return b""
1036        with self._read_lock:
1037            self._peek_unlocked(1)
1038            return self._read_unlocked(
1039                min(n, len(self._read_buf) - self._read_pos))
1040
1041    def tell(self):
1042        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1043
1044    def seek(self, pos, whence=0):
1045        if not (0 <= whence <= 2):
1046            raise ValueError("invalid whence value")
1047        with self._read_lock:
1048            if whence == 1:
1049                pos -= len(self._read_buf) - self._read_pos
1050            pos = _BufferedIOMixin.seek(self, pos, whence)
1051            self._reset_read_buf()
1052            return pos
1053
1054class BufferedWriter(_BufferedIOMixin):
1055
1056    """A buffer for a writeable sequential RawIO object.
1057
1058    The constructor creates a BufferedWriter for the given writeable raw
1059    stream. If the buffer_size is not given, it defaults to
1060    DEFAULT_BUFFER_SIZE.
1061    """
1062
1063    _warning_stack_offset = 2
1064
1065    def __init__(self, raw,
1066                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1067        if not raw.writable():
1068            raise IOError('"raw" argument must be writable.')
1069
1070        _BufferedIOMixin.__init__(self, raw)
1071        if buffer_size <= 0:
1072            raise ValueError("invalid buffer size")
1073        if max_buffer_size is not None:
1074            warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1075                          self._warning_stack_offset)
1076        self.buffer_size = buffer_size
1077        self._write_buf = bytearray()
1078        self._write_lock = Lock()
1079
1080    def write(self, b):
1081        if self.closed:
1082            raise ValueError("write to closed file")
1083        if isinstance(b, unicode):
1084            raise TypeError("can't write unicode to binary stream")
1085        with self._write_lock:
1086            # XXX we can implement some more tricks to try and avoid
1087            # partial writes
1088            if len(self._write_buf) > self.buffer_size:
1089                # We're full, so let's pre-flush the buffer.  (This may
1090                # raise BlockingIOError with characters_written == 0.)
1091                self._flush_unlocked()
1092            before = len(self._write_buf)
1093            self._write_buf.extend(b)
1094            written = len(self._write_buf) - before
1095            if len(self._write_buf) > self.buffer_size:
1096                try:
1097                    self._flush_unlocked()
1098                except BlockingIOError as e:
1099                    if len(self._write_buf) > self.buffer_size:
1100                        # We've hit the buffer_size. We have to accept a partial
1101                        # write and cut back our buffer.
1102                        overage = len(self._write_buf) - self.buffer_size
1103                        written -= overage
1104                        self._write_buf = self._write_buf[:self.buffer_size]
1105                        raise BlockingIOError(e.errno, e.strerror, written)
1106            return written
1107
1108    def truncate(self, pos=None):
1109        with self._write_lock:
1110            self._flush_unlocked()
1111            if pos is None:
1112                pos = self.raw.tell()
1113            return self.raw.truncate(pos)
1114
1115    def flush(self):
1116        with self._write_lock:
1117            self._flush_unlocked()
1118
1119    def _flush_unlocked(self):
1120        if self.closed:
1121            raise ValueError("flush of closed file")
1122        while self._write_buf:
1123            try:
1124                n = self.raw.write(self._write_buf)
1125            except BlockingIOError:
1126                raise RuntimeError("self.raw should implement RawIOBase: it "
1127                                   "should not raise BlockingIOError")
1128            except IOError as e:
1129                if e.errno != EINTR:
1130                    raise
1131                continue
1132            if n is None:
1133                raise BlockingIOError(
1134                    errno.EAGAIN,
1135                    "write could not complete without blocking", 0)
1136            if n > len(self._write_buf) or n < 0:
1137                raise IOError("write() returned incorrect number of bytes")
1138            del self._write_buf[:n]
1139
1140    def tell(self):
1141        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1142
1143    def seek(self, pos, whence=0):
1144        if not (0 <= whence <= 2):
1145            raise ValueError("invalid whence")
1146        with self._write_lock:
1147            self._flush_unlocked()
1148            return _BufferedIOMixin.seek(self, pos, whence)
1149
1150
1151class BufferedRWPair(BufferedIOBase):
1152
1153    """A buffered reader and writer object together.
1154
1155    A buffered reader object and buffered writer object put together to
1156    form a sequential IO object that can read and write. This is typically
1157    used with a socket or two-way pipe.
1158
1159    reader and writer are RawIOBase objects that are readable and
1160    writeable respectively. If the buffer_size is omitted it defaults to
1161    DEFAULT_BUFFER_SIZE.
1162    """
1163
1164    # XXX The usefulness of this (compared to having two separate IO
1165    # objects) is questionable.
1166
1167    def __init__(self, reader, writer,
1168                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1169        """Constructor.
1170
1171        The arguments are two RawIO instances.
1172        """
1173        if max_buffer_size is not None:
1174            warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1175
1176        if not reader.readable():
1177            raise IOError('"reader" argument must be readable.')
1178
1179        if not writer.writable():
1180            raise IOError('"writer" argument must be writable.')
1181
1182        self.reader = BufferedReader(reader, buffer_size)
1183        self.writer = BufferedWriter(writer, buffer_size)
1184
1185    def read(self, n=None):
1186        if n is None:
1187            n = -1
1188        return self.reader.read(n)
1189
1190    def readinto(self, b):
1191        return self.reader.readinto(b)
1192
1193    def write(self, b):
1194        return self.writer.write(b)
1195
1196    def peek(self, n=0):
1197        return self.reader.peek(n)
1198
1199    def read1(self, n):
1200        return self.reader.read1(n)
1201
1202    def readable(self):
1203        return self.reader.readable()
1204
1205    def writable(self):
1206        return self.writer.writable()
1207
1208    def flush(self):
1209        return self.writer.flush()
1210
1211    def close(self):
1212        self.writer.close()
1213        self.reader.close()
1214
1215    def isatty(self):
1216        return self.reader.isatty() or self.writer.isatty()
1217
1218    @property
1219    def closed(self):
1220        return self.writer.closed
1221
1222
1223class BufferedRandom(BufferedWriter, BufferedReader):
1224
1225    """A buffered interface to random access streams.
1226
1227    The constructor creates a reader and writer for a seekable stream,
1228    raw, given in the first argument. If the buffer_size is omitted it
1229    defaults to DEFAULT_BUFFER_SIZE.
1230    """
1231
1232    _warning_stack_offset = 3
1233
1234    def __init__(self, raw,
1235                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1236        raw._checkSeekable()
1237        BufferedReader.__init__(self, raw, buffer_size)
1238        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1239
1240    def seek(self, pos, whence=0):
1241        if not (0 <= whence <= 2):
1242            raise ValueError("invalid whence")
1243        self.flush()
1244        if self._read_buf:
1245            # Undo read ahead.
1246            with self._read_lock:
1247                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1248        # First do the raw seek, then empty the read buffer, so that
1249        # if the raw seek fails, we don't lose buffered data forever.
1250        pos = self.raw.seek(pos, whence)
1251        with self._read_lock:
1252            self._reset_read_buf()
1253        if pos < 0:
1254            raise IOError("seek() returned invalid position")
1255        return pos
1256
1257    def tell(self):
1258        if self._write_buf:
1259            return BufferedWriter.tell(self)
1260        else:
1261            return BufferedReader.tell(self)
1262
1263    def truncate(self, pos=None):
1264        if pos is None:
1265            pos = self.tell()
1266        # Use seek to flush the read buffer.
1267        return BufferedWriter.truncate(self, pos)
1268
1269    def read(self, n=None):
1270        if n is None:
1271            n = -1
1272        self.flush()
1273        return BufferedReader.read(self, n)
1274
1275    def readinto(self, b):
1276        self.flush()
1277        return BufferedReader.readinto(self, b)
1278
1279    def peek(self, n=0):
1280        self.flush()
1281        return BufferedReader.peek(self, n)
1282
1283    def read1(self, n):
1284        self.flush()
1285        return BufferedReader.read1(self, n)
1286
1287    def write(self, b):
1288        if self._read_buf:
1289            # Undo readahead
1290            with self._read_lock:
1291                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1292                self._reset_read_buf()
1293        return BufferedWriter.write(self, b)
1294
1295
1296class TextIOBase(IOBase):
1297
1298    """Base class for text I/O.
1299
1300    This class provides a character and line based interface to stream
1301    I/O. There is no readinto method because Python's character strings
1302    are immutable. There is no public constructor.
1303    """
1304
1305    def read(self, n=-1):
1306        """Read at most n characters from stream.
1307
1308        Read from underlying buffer until we have n characters or we hit EOF.
1309        If n is negative or omitted, read until EOF.
1310        """
1311        self._unsupported("read")
1312
1313    def write(self, s):
1314        """Write string s to stream."""
1315        self._unsupported("write")
1316
1317    def truncate(self, pos=None):
1318        """Truncate size to pos."""
1319        self._unsupported("truncate")
1320
1321    def readline(self):
1322        """Read until newline or EOF.
1323
1324        Returns an empty string if EOF is hit immediately.
1325        """
1326        self._unsupported("readline")
1327
1328    def detach(self):
1329        """
1330        Separate the underlying buffer from the TextIOBase and return it.
1331
1332        After the underlying buffer has been detached, the TextIO is in an
1333        unusable state.
1334        """
1335        self._unsupported("detach")
1336
1337    @property
1338    def encoding(self):
1339        """Subclasses should override."""
1340        return None
1341
1342    @property
1343    def newlines(self):
1344        """Line endings translated so far.
1345
1346        Only line endings translated during reading are considered.
1347
1348        Subclasses should override.
1349        """
1350        return None
1351
1352    @property
1353    def errors(self):
1354        """Error setting of the decoder or encoder.
1355
1356        Subclasses should override."""
1357        return None
1358
1359io.TextIOBase.register(TextIOBase)
1360
1361
1362class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1363    r"""Codec used when reading a file in universal newlines mode.  It wraps
1364    another incremental decoder, translating \r\n and \r into \n.  It also
1365    records the types of newlines encountered.  When used with
1366    translate=False, it ensures that the newline sequence is returned in
1367    one piece.
1368    """
1369    def __init__(self, decoder, translate, errors='strict'):
1370        codecs.IncrementalDecoder.__init__(self, errors=errors)
1371        self.translate = translate
1372        self.decoder = decoder
1373        self.seennl = 0
1374        self.pendingcr = False
1375
1376    def decode(self, input, final=False):
1377        # decode input (with the eventual \r from a previous pass)
1378        if self.decoder is None:
1379            output = input
1380        else:
1381            output = self.decoder.decode(input, final=final)
1382        if self.pendingcr and (output or final):
1383            output = "\r" + output
1384            self.pendingcr = False
1385
1386        # retain last \r even when not translating data:
1387        # then readline() is sure to get \r\n in one pass
1388        if output.endswith("\r") and not final:
1389            output = output[:-1]
1390            self.pendingcr = True
1391
1392        # Record which newlines are read
1393        crlf = output.count('\r\n')
1394        cr = output.count('\r') - crlf
1395        lf = output.count('\n') - crlf
1396        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1397                    | (crlf and self._CRLF)
1398
1399        if self.translate:
1400            if crlf:
1401                output = output.replace("\r\n", "\n")
1402            if cr:
1403                output = output.replace("\r", "\n")
1404
1405        return output
1406
1407    def getstate(self):
1408        if self.decoder is None:
1409            buf = b""
1410            flag = 0
1411        else:
1412            buf, flag = self.decoder.getstate()
1413        flag <<= 1
1414        if self.pendingcr:
1415            flag |= 1
1416        return buf, flag
1417
1418    def setstate(self, state):
1419        buf, flag = state
1420        self.pendingcr = bool(flag & 1)
1421        if self.decoder is not None:
1422            self.decoder.setstate((buf, flag >> 1))
1423
1424    def reset(self):
1425        self.seennl = 0
1426        self.pendingcr = False
1427        if self.decoder is not None:
1428            self.decoder.reset()
1429
1430    _LF = 1
1431    _CR = 2
1432    _CRLF = 4
1433
1434    @property
1435    def newlines(self):
1436        return (None,
1437                "\n",
1438                "\r",
1439                ("\r", "\n"),
1440                "\r\n",
1441                ("\n", "\r\n"),
1442                ("\r", "\r\n"),
1443                ("\r", "\n", "\r\n")
1444               )[self.seennl]
1445
1446
1447class TextIOWrapper(TextIOBase):
1448
1449    r"""Character and line based layer over a BufferedIOBase object, buffer.
1450
1451    encoding gives the name of the encoding that the stream will be
1452    decoded or encoded with. It defaults to locale.getpreferredencoding.
1453
1454    errors determines the strictness of encoding and decoding (see the
1455    codecs.register) and defaults to "strict".
1456
1457    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1458    handling of line endings. If it is None, universal newlines is
1459    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1460    or '\r\n' are translated to '\n' before being returned to the
1461    caller. Conversely, on output, '\n' is translated to the system
1462    default line separator, os.linesep. If newline is any other of its
1463    legal values, that newline becomes the newline when the file is read
1464    and it is returned untranslated. On output, '\n' is converted to the
1465    newline.
1466
1467    If line_buffering is True, a call to flush is implied when a call to
1468    write contains a newline character.
1469    """
1470
1471    _CHUNK_SIZE = 2048
1472
1473    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1474                 line_buffering=False):
1475        if newline is not None and not isinstance(newline, basestring):
1476            raise TypeError("illegal newline type: %r" % (type(newline),))
1477        if newline not in (None, "", "\n", "\r", "\r\n"):
1478            raise ValueError("illegal newline value: %r" % (newline,))
1479        if encoding is None:
1480            try:
1481                import locale
1482            except ImportError:
1483                # Importing locale may fail if Python is being built
1484                encoding = "ascii"
1485            else:
1486                encoding = locale.getpreferredencoding()
1487
1488        if not isinstance(encoding, basestring):
1489            raise ValueError("invalid encoding: %r" % encoding)
1490
1491        if errors is None:
1492            errors = "strict"
1493        else:
1494            if not isinstance(errors, basestring):
1495                raise ValueError("invalid errors: %r" % errors)
1496
1497        self._buffer = buffer
1498        self._line_buffering = line_buffering
1499        self._encoding = encoding
1500        self._errors = errors
1501        self._readuniversal = not newline
1502        self._readtranslate = newline is None
1503        self._readnl = newline
1504        self._writetranslate = newline != ''
1505        self._writenl = newline or os.linesep
1506        self._encoder = None
1507        self._decoder = None
1508        self._decoded_chars = ''  # buffer for text returned from decoder
1509        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1510        self._snapshot = None  # info for reconstructing decoder state
1511        self._seekable = self._telling = self.buffer.seekable()
1512
1513        if self._seekable and self.writable():
1514            position = self.buffer.tell()
1515            if position != 0:
1516                try:
1517                    self._get_encoder().setstate(0)
1518                except LookupError:
1519                    # Sometimes the encoder doesn't exist
1520                    pass
1521
1522    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1523    # where dec_flags is the second (integer) item of the decoder state
1524    # and next_input is the chunk of input bytes that comes next after the
1525    # snapshot point.  We use this to reconstruct decoder states in tell().
1526
1527    # Naming convention:
1528    #   - "bytes_..." for integer variables that count input bytes
1529    #   - "chars_..." for integer variables that count decoded characters
1530
1531    def __repr__(self):
1532        try:
1533            name = self.name
1534        except AttributeError:
1535            return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1536        else:
1537            return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1538                name, self.encoding)
1539
1540    @property
1541    def encoding(self):
1542        return self._encoding
1543
1544    @property
1545    def errors(self):
1546        return self._errors
1547
1548    @property
1549    def line_buffering(self):
1550        return self._line_buffering
1551
1552    @property
1553    def buffer(self):
1554        return self._buffer
1555
1556    def seekable(self):
1557        if self.closed:
1558            raise ValueError("I/O operation on closed file.")
1559        return self._seekable
1560
1561    def readable(self):
1562        return self.buffer.readable()
1563
1564    def writable(self):
1565        return self.buffer.writable()
1566
1567    def flush(self):
1568        self.buffer.flush()
1569        self._telling = self._seekable
1570
1571    def close(self):
1572        if self.buffer is not None and not self.closed:
1573            try:
1574                self.flush()
1575            finally:
1576                self.buffer.close()
1577
1578    @property
1579    def closed(self):
1580        return self.buffer.closed
1581
1582    @property
1583    def name(self):
1584        return self.buffer.name
1585
1586    def fileno(self):
1587        return self.buffer.fileno()
1588
1589    def isatty(self):
1590        return self.buffer.isatty()
1591
1592    def write(self, s):
1593        if self.closed:
1594            raise ValueError("write to closed file")
1595        if not isinstance(s, unicode):
1596            raise TypeError("can't write %s to text stream" %
1597                            s.__class__.__name__)
1598        length = len(s)
1599        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1600        if haslf and self._writetranslate and self._writenl != "\n":
1601            s = s.replace("\n", self._writenl)
1602        encoder = self._encoder or self._get_encoder()
1603        # XXX What if we were just reading?
1604        b = encoder.encode(s)
1605        self.buffer.write(b)
1606        if self._line_buffering and (haslf or "\r" in s):
1607            self.flush()
1608        self._snapshot = None
1609        if self._decoder:
1610            self._decoder.reset()
1611        return length
1612
1613    def _get_encoder(self):
1614        make_encoder = codecs.getincrementalencoder(self._encoding)
1615        self._encoder = make_encoder(self._errors)
1616        return self._encoder
1617
1618    def _get_decoder(self):
1619        make_decoder = codecs.getincrementaldecoder(self._encoding)
1620        decoder = make_decoder(self._errors)
1621        if self._readuniversal:
1622            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1623        self._decoder = decoder
1624        return decoder
1625
1626    # The following three methods implement an ADT for _decoded_chars.
1627    # Text returned from the decoder is buffered here until the client
1628    # requests it by calling our read() or readline() method.
1629    def _set_decoded_chars(self, chars):
1630        """Set the _decoded_chars buffer."""
1631        self._decoded_chars = chars
1632        self._decoded_chars_used = 0
1633
1634    def _get_decoded_chars(self, n=None):
1635        """Advance into the _decoded_chars buffer."""
1636        offset = self._decoded_chars_used
1637        if n is None:
1638            chars = self._decoded_chars[offset:]
1639        else:
1640            chars = self._decoded_chars[offset:offset + n]
1641        self._decoded_chars_used += len(chars)
1642        return chars
1643
1644    def _rewind_decoded_chars(self, n):
1645        """Rewind the _decoded_chars buffer."""
1646        if self._decoded_chars_used < n:
1647            raise AssertionError("rewind decoded_chars out of bounds")
1648        self._decoded_chars_used -= n
1649
1650    def _read_chunk(self):
1651        """
1652        Read and decode the next chunk of data from the BufferedReader.
1653        """
1654
1655        # The return value is True unless EOF was reached.  The decoded
1656        # string is placed in self._decoded_chars (replacing its previous
1657        # value).  The entire input chunk is sent to the decoder, though
1658        # some of it may remain buffered in the decoder, yet to be
1659        # converted.
1660
1661        if self._decoder is None:
1662            raise ValueError("no decoder")
1663
1664        if self._telling:
1665            # To prepare for tell(), we need to snapshot a point in the
1666            # file where the decoder's input buffer is empty.
1667
1668            dec_buffer, dec_flags = self._decoder.getstate()
1669            # Given this, we know there was a valid snapshot point
1670            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1671
1672        # Read a chunk, decode it, and put the result in self._decoded_chars.
1673        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1674        eof = not input_chunk
1675        self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1676
1677        if self._telling:
1678            # At the snapshot point, len(dec_buffer) bytes before the read,
1679            # the next input to be decoded is dec_buffer + input_chunk.
1680            self._snapshot = (dec_flags, dec_buffer + input_chunk)
1681
1682        return not eof
1683
1684    def _pack_cookie(self, position, dec_flags=0,
1685                           bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1686        # The meaning of a tell() cookie is: seek to position, set the
1687        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1688        # into the decoder with need_eof as the EOF flag, then skip
1689        # chars_to_skip characters of the decoded result.  For most simple
1690        # decoders, tell() will often just give a byte offset in the file.
1691        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1692               (chars_to_skip<<192) | bool(need_eof)<<256)
1693
1694    def _unpack_cookie(self, bigint):
1695        rest, position = divmod(bigint, 1<<64)
1696        rest, dec_flags = divmod(rest, 1<<64)
1697        rest, bytes_to_feed = divmod(rest, 1<<64)
1698        need_eof, chars_to_skip = divmod(rest, 1<<64)
1699        return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1700
1701    def tell(self):
1702        if not self._seekable:
1703            raise IOError("underlying stream is not seekable")
1704        if not self._telling:
1705            raise IOError("telling position disabled by next() call")
1706        self.flush()
1707        position = self.buffer.tell()
1708        decoder = self._decoder
1709        if decoder is None or self._snapshot is None:
1710            if self._decoded_chars:
1711                # This should never happen.
1712                raise AssertionError("pending decoded text")
1713            return position
1714
1715        # Skip backward to the snapshot point (see _read_chunk).
1716        dec_flags, next_input = self._snapshot
1717        position -= len(next_input)
1718
1719        # How many decoded characters have been used up since the snapshot?
1720        chars_to_skip = self._decoded_chars_used
1721        if chars_to_skip == 0:
1722            # We haven't moved from the snapshot point.
1723            return self._pack_cookie(position, dec_flags)
1724
1725        # Starting from the snapshot position, we will walk the decoder
1726        # forward until it gives us enough decoded characters.
1727        saved_state = decoder.getstate()
1728        try:
1729            # Note our initial start point.
1730            decoder.setstate((b'', dec_flags))
1731            start_pos = position
1732            start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1733            need_eof = 0
1734
1735            # Feed the decoder one byte at a time.  As we go, note the
1736            # nearest "safe start point" before the current location
1737            # (a point where the decoder has nothing buffered, so seek()
1738            # can safely start from there and advance to this location).
1739            for next_byte in next_input:
1740                bytes_fed += 1
1741                chars_decoded += len(decoder.decode(next_byte))
1742                dec_buffer, dec_flags = decoder.getstate()
1743                if not dec_buffer and chars_decoded <= chars_to_skip:
1744                    # Decoder buffer is empty, so this is a safe start point.
1745                    start_pos += bytes_fed
1746                    chars_to_skip -= chars_decoded
1747                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1748                if chars_decoded >= chars_to_skip:
1749                    break
1750            else:
1751                # We didn't get enough decoded data; signal EOF to get more.
1752                chars_decoded += len(decoder.decode(b'', final=True))
1753                need_eof = 1
1754                if chars_decoded < chars_to_skip:
1755                    raise IOError("can't reconstruct logical file position")
1756
1757            # The returned cookie corresponds to the last safe start point.
1758            return self._pack_cookie(
1759                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1760        finally:
1761            decoder.setstate(saved_state)
1762
1763    def truncate(self, pos=None):
1764        self.flush()
1765        if pos is None:
1766            pos = self.tell()
1767        return self.buffer.truncate(pos)
1768
1769    def detach(self):
1770        if self.buffer is None:
1771            raise ValueError("buffer is already detached")
1772        self.flush()
1773        buffer = self._buffer
1774        self._buffer = None
1775        return buffer
1776
1777    def seek(self, cookie, whence=0):
1778        if self.closed:
1779            raise ValueError("tell on closed file")
1780        if not self._seekable:
1781            raise IOError("underlying stream is not seekable")
1782        if whence == 1: # seek relative to current position
1783            if cookie != 0:
1784                raise IOError("can't do nonzero cur-relative seeks")
1785            # Seeking to the current position should attempt to
1786            # sync the underlying buffer with the current position.
1787            whence = 0
1788            cookie = self.tell()
1789        if whence == 2: # seek relative to end of file
1790            if cookie != 0:
1791                raise IOError("can't do nonzero end-relative seeks")
1792            self.flush()
1793            position = self.buffer.seek(0, 2)
1794            self._set_decoded_chars('')
1795            self._snapshot = None
1796            if self._decoder:
1797                self._decoder.reset()
1798            return position
1799        if whence != 0:
1800            raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1801                             (whence,))
1802        if cookie < 0:
1803            raise ValueError("negative seek position %r" % (cookie,))
1804        self.flush()
1805
1806        # The strategy of seek() is to go back to the safe start point
1807        # and replay the effect of read(chars_to_skip) from there.
1808        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1809            self._unpack_cookie(cookie)
1810
1811        # Seek back to the safe start point.
1812        self.buffer.seek(start_pos)
1813        self._set_decoded_chars('')
1814        self._snapshot = None
1815
1816        # Restore the decoder to its state from the safe start point.
1817        if cookie == 0 and self._decoder:
1818            self._decoder.reset()
1819        elif self._decoder or dec_flags or chars_to_skip:
1820            self._decoder = self._decoder or self._get_decoder()
1821            self._decoder.setstate((b'', dec_flags))
1822            self._snapshot = (dec_flags, b'')
1823
1824        if chars_to_skip:
1825            # Just like _read_chunk, feed the decoder and save a snapshot.
1826            input_chunk = self.buffer.read(bytes_to_feed)
1827            self._set_decoded_chars(
1828                self._decoder.decode(input_chunk, need_eof))
1829            self._snapshot = (dec_flags, input_chunk)
1830
1831            # Skip chars_to_skip of the decoded characters.
1832            if len(self._decoded_chars) < chars_to_skip:
1833                raise IOError("can't restore logical file position")
1834            self._decoded_chars_used = chars_to_skip
1835
1836        # Finally, reset the encoder (merely useful for proper BOM handling)
1837        try:
1838            encoder = self._encoder or self._get_encoder()
1839        except LookupError:
1840            # Sometimes the encoder doesn't exist
1841            pass
1842        else:
1843            if cookie != 0:
1844                encoder.setstate(0)
1845            else:
1846                encoder.reset()
1847        return cookie
1848
1849    def read(self, n=None):
1850        self._checkReadable()
1851        if n is None:
1852            n = -1
1853        decoder = self._decoder or self._get_decoder()
1854        try:
1855            n.__index__
1856        except AttributeError:
1857            raise TypeError("an integer is required")
1858        if n < 0:
1859            # Read everything.
1860            result = (self._get_decoded_chars() +
1861                      decoder.decode(self.buffer.read(), final=True))
1862            self._set_decoded_chars('')
1863            self._snapshot = None
1864            return result
1865        else:
1866            # Keep reading chunks until we have n characters to return.
1867            eof = False
1868            result = self._get_decoded_chars(n)
1869            while len(result) < n and not eof:
1870                eof = not self._read_chunk()
1871                result += self._get_decoded_chars(n - len(result))
1872            return result
1873
1874    def next(self):
1875        self._telling = False
1876        line = self.readline()
1877        if not line:
1878            self._snapshot = None
1879            self._telling = self._seekable
1880            raise StopIteration
1881        return line
1882
1883    def readline(self, limit=None):
1884        if self.closed:
1885            raise ValueError("read from closed file")
1886        if limit is None:
1887            limit = -1
1888        elif not isinstance(limit, (int, long)):
1889            raise TypeError("limit must be an integer")
1890
1891        # Grab all the decoded text (we will rewind any extra bits later).
1892        line = self._get_decoded_chars()
1893
1894        start = 0
1895        # Make the decoder if it doesn't already exist.
1896        if not self._decoder:
1897            self._get_decoder()
1898
1899        pos = endpos = None
1900        while True:
1901            if self._readtranslate:
1902                # Newlines are already translated, only search for \n
1903                pos = line.find('\n', start)
1904                if pos >= 0:
1905                    endpos = pos + 1
1906                    break
1907                else:
1908                    start = len(line)
1909
1910            elif self._readuniversal:
1911                # Universal newline search. Find any of \r, \r\n, \n
1912                # The decoder ensures that \r\n are not split in two pieces
1913
1914                # In C we'd look for these in parallel of course.
1915                nlpos = line.find("\n", start)
1916                crpos = line.find("\r", start)
1917                if crpos == -1:
1918                    if nlpos == -1:
1919                        # Nothing found
1920                        start = len(line)
1921                    else:
1922                        # Found \n
1923                        endpos = nlpos + 1
1924                        break
1925                elif nlpos == -1:
1926                    # Found lone \r
1927                    endpos = crpos + 1
1928                    break
1929                elif nlpos < crpos:
1930                    # Found \n
1931                    endpos = nlpos + 1
1932                    break
1933                elif nlpos == crpos + 1:
1934                    # Found \r\n
1935                    endpos = crpos + 2
1936                    break
1937                else:
1938                    # Found \r
1939                    endpos = crpos + 1
1940                    break
1941            else:
1942                # non-universal
1943                pos = line.find(self._readnl)
1944                if pos >= 0:
1945                    endpos = pos + len(self._readnl)
1946                    break
1947
1948            if limit >= 0 and len(line) >= limit:
1949                endpos = limit  # reached length limit
1950                break
1951
1952            # No line ending seen yet - get more data'
1953            while self._read_chunk():
1954                if self._decoded_chars:
1955                    break
1956            if self._decoded_chars:
1957                line += self._get_decoded_chars()
1958            else:
1959                # end of file
1960                self._set_decoded_chars('')
1961                self._snapshot = None
1962                return line
1963
1964        if limit >= 0 and endpos > limit:
1965            endpos = limit  # don't exceed limit
1966
1967        # Rewind _decoded_chars to just after the line ending we found.
1968        self._rewind_decoded_chars(len(line) - endpos)
1969        return line[:endpos]
1970
1971    @property
1972    def newlines(self):
1973        return self._decoder.newlines if self._decoder else None
1974
1975
1976class StringIO(TextIOWrapper):
1977    """Text I/O implementation using an in-memory buffer.
1978
1979    The initial_value argument sets the value of object.  The newline
1980    argument is like the one of TextIOWrapper's constructor.
1981    """
1982
1983    def __init__(self, initial_value="", newline="\n"):
1984        super(StringIO, self).__init__(BytesIO(),
1985                                       encoding="utf-8",
1986                                       errors="strict",
1987                                       newline=newline)
1988        # Issue #5645: make universal newlines semantics the same as in the
1989        # C version, even under Windows.
1990        if newline is None:
1991            self._writetranslate = False
1992        if initial_value:
1993            if not isinstance(initial_value, unicode):
1994                initial_value = unicode(initial_value)
1995            self.write(initial_value)
1996            self.seek(0)
1997
1998    def getvalue(self):
1999        self.flush()
2000        return self.buffer.getvalue().decode(self._encoding, self._errors)
2001
2002    def __repr__(self):
2003        # TextIOWrapper tells the encoding in its repr. In StringIO,
2004        # that's a implementation detail.
2005        return object.__repr__(self)
2006
2007    @property
2008    def errors(self):
2009        return None
2010
2011    @property
2012    def encoding(self):
2013        return None
2014
2015    def detach(self):
2016        # This doesn't make sense on StringIO.
2017        self._unsupported("detach")
2018