_pyio.py revision 53ae6145a0b8f3380f819bf1c60b5dfc676f05ec
1"""
2Python implementation of the io module.
3"""
4
5from __future__ import (print_function, unicode_literals)
6
7import os
8import abc
9import codecs
10import warnings
11import errno
12# Import thread instead of threading to reduce startup cost
13try:
14    from thread import allocate_lock as Lock
15except ImportError:
16    from dummy_thread import allocate_lock as Lock
17
18import io
19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20from errno import EINTR
21
22__metaclass__ = type
23
24# open() uses st_blksize whenever we can
25DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
26
27# NOTE: Base classes defined here are registered with the "official" ABCs
28# defined in io.py. We don't use real inheritance though, because we don't
29# want to inherit the C implementations.
30
31
32class BlockingIOError(IOError):
33
34    """Exception raised when I/O would block on a non-blocking I/O stream."""
35
36    def __init__(self, errno, strerror, characters_written=0):
37        super(IOError, self).__init__(errno, strerror)
38        if not isinstance(characters_written, (int, long)):
39            raise TypeError("characters_written must be a integer")
40        self.characters_written = characters_written
41
42
43def open(file, mode="r", buffering=-1,
44         encoding=None, errors=None,
45         newline=None, closefd=True):
46
47    r"""Open file and return a stream.  Raise IOError upon failure.
48
49    file is either a text or byte string giving the name (and the path
50    if the file isn't in the current working directory) of the file to
51    be opened or an integer file descriptor of the file to be
52    wrapped. (If a file descriptor is given, it is closed when the
53    returned I/O object is closed, unless closefd is set to False.)
54
55    mode is an optional string that specifies the mode in which the file
56    is opened. It defaults to 'r' which means open for reading in text
57    mode.  Other common values are 'w' for writing (truncating the file if
58    it already exists), and 'a' for appending (which on some Unix systems,
59    means that all writes append to the end of the file regardless of the
60    current seek position). In text mode, if encoding is not specified the
61    encoding used is platform dependent. (For reading and writing raw
62    bytes use binary mode and leave encoding unspecified.) The available
63    modes are:
64
65    ========= ===============================================================
66    Character Meaning
67    --------- ---------------------------------------------------------------
68    'r'       open for reading (default)
69    'w'       open for writing, truncating the file first
70    'a'       open for writing, appending to the end of the file if it exists
71    'b'       binary mode
72    't'       text mode (default)
73    '+'       open a disk file for updating (reading and writing)
74    'U'       universal newline mode (for backwards compatibility; unneeded
75              for new code)
76    ========= ===============================================================
77
78    The default mode is 'rt' (open for reading text). For binary random
79    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80    'r+b' opens the file without truncation.
81
82    Python distinguishes between files opened in binary and text modes,
83    even when the underlying operating system doesn't. Files opened in
84    binary mode (appending 'b' to the mode argument) return contents as
85    bytes objects without any decoding. In text mode (the default, or when
86    't' is appended to the mode argument), the contents of the file are
87    returned as strings, the bytes having been first decoded using a
88    platform-dependent encoding or using the specified encoding if given.
89
90    buffering is an optional integer used to set the buffering policy.
91    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
92    line buffering (only usable in text mode), and an integer > 1 to indicate
93    the size of a fixed-size chunk buffer.  When no buffering argument is
94    given, the default buffering policy works as follows:
95
96    * Binary files are buffered in fixed-size chunks; the size of the buffer
97      is chosen using a heuristic trying to determine the underlying device's
98      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
99      On many systems, the buffer will typically be 4096 or 8192 bytes long.
100
101    * "Interactive" text files (files for which isatty() returns True)
102      use line buffering.  Other text files use the policy described above
103      for binary files.
104
105    encoding is the name of the encoding used to decode or encode the
106    file. This should only be used in text mode. The default encoding is
107    platform dependent, but any encoding supported by Python can be
108    passed.  See the codecs module for the list of supported encodings.
109
110    errors is an optional string that specifies how encoding errors are to
111    be handled---this argument should not be used in binary mode. Pass
112    'strict' to raise a ValueError exception if there is an encoding error
113    (the default of None has the same effect), or pass 'ignore' to ignore
114    errors. (Note that ignoring encoding errors can lead to data loss.)
115    See the documentation for codecs.register for a list of the permitted
116    encoding error strings.
117
118    newline controls how universal newlines works (it only applies to text
119    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
120    follows:
121
122    * On input, if newline is None, universal newlines mode is
123      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
124      these are translated into '\n' before being returned to the
125      caller. If it is '', universal newline mode is enabled, but line
126      endings are returned to the caller untranslated. If it has any of
127      the other legal values, input lines are only terminated by the given
128      string, and the line ending is returned to the caller untranslated.
129
130    * On output, if newline is None, any '\n' characters written are
131      translated to the system default line separator, os.linesep. If
132      newline is '', no translation takes place. If newline is any of the
133      other legal values, any '\n' characters written are translated to
134      the given string.
135
136    If closefd is False, the underlying file descriptor will be kept open
137    when the file is closed. This does not work when a file name is given
138    and must be True in that case.
139
140    open() returns a file object whose type depends on the mode, and
141    through which the standard file operations such as reading and writing
142    are performed. When open() is used to open a file in a text mode ('w',
143    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
144    a file in a binary mode, the returned class varies: in read binary
145    mode, it returns a BufferedReader; in write binary and append binary
146    modes, it returns a BufferedWriter, and in read/write mode, it returns
147    a BufferedRandom.
148
149    It is also possible to use a string or bytearray as a file for both
150    reading and writing. For strings StringIO can be used like a file
151    opened in a text mode, and for bytes a BytesIO can be used like a file
152    opened in a binary mode.
153    """
154    if not isinstance(file, (basestring, int, long)):
155        raise TypeError("invalid file: %r" % file)
156    if not isinstance(mode, basestring):
157        raise TypeError("invalid mode: %r" % mode)
158    if not isinstance(buffering, (int, long)):
159        raise TypeError("invalid buffering: %r" % buffering)
160    if encoding is not None and not isinstance(encoding, basestring):
161        raise TypeError("invalid encoding: %r" % encoding)
162    if errors is not None and not isinstance(errors, basestring):
163        raise TypeError("invalid errors: %r" % errors)
164    modes = set(mode)
165    if modes - set("arwb+tU") or len(mode) > len(modes):
166        raise ValueError("invalid mode: %r" % mode)
167    reading = "r" in modes
168    writing = "w" in modes
169    appending = "a" in modes
170    updating = "+" in modes
171    text = "t" in modes
172    binary = "b" in modes
173    if "U" in modes:
174        if writing or appending:
175            raise ValueError("can't use U and writing mode at once")
176        reading = True
177    if text and binary:
178        raise ValueError("can't have text and binary mode at once")
179    if reading + writing + appending > 1:
180        raise ValueError("can't have read/write/append mode at once")
181    if not (reading or writing or appending):
182        raise ValueError("must have exactly one of read/write/append mode")
183    if binary and encoding is not None:
184        raise ValueError("binary mode doesn't take an encoding argument")
185    if binary and errors is not None:
186        raise ValueError("binary mode doesn't take an errors argument")
187    if binary and newline is not None:
188        raise ValueError("binary mode doesn't take a newline argument")
189    raw = FileIO(file,
190                 (reading and "r" or "") +
191                 (writing and "w" or "") +
192                 (appending and "a" or "") +
193                 (updating and "+" or ""),
194                 closefd)
195    result = raw
196    try:
197        line_buffering = False
198        if buffering == 1 or buffering < 0 and raw.isatty():
199            buffering = -1
200            line_buffering = True
201        if buffering < 0:
202            buffering = DEFAULT_BUFFER_SIZE
203            try:
204                bs = os.fstat(raw.fileno()).st_blksize
205            except (os.error, AttributeError):
206                pass
207            else:
208                if bs > 1:
209                    buffering = bs
210        if buffering < 0:
211            raise ValueError("invalid buffering size")
212        if buffering == 0:
213            if binary:
214                return result
215            raise ValueError("can't have unbuffered text I/O")
216        if updating:
217            buffer = BufferedRandom(raw, buffering)
218        elif writing or appending:
219            buffer = BufferedWriter(raw, buffering)
220        elif reading:
221            buffer = BufferedReader(raw, buffering)
222        else:
223            raise ValueError("unknown mode: %r" % mode)
224        result = buffer
225        if binary:
226            return result
227        text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
228        result = text
229        text.mode = mode
230        return result
231    except:
232        result.close()
233        raise
234
235
236class DocDescriptor:
237    """Helper for builtins.open.__doc__
238    """
239    def __get__(self, obj, typ):
240        return (
241            "open(file, mode='r', buffering=-1, encoding=None, "
242                 "errors=None, newline=None, closefd=True)\n\n" +
243            open.__doc__)
244
245class OpenWrapper:
246    """Wrapper for builtins.open
247
248    Trick so that open won't become a bound method when stored
249    as a class variable (as dbm.dumb does).
250
251    See initstdio() in Python/pythonrun.c.
252    """
253    __doc__ = DocDescriptor()
254
255    def __new__(cls, *args, **kwargs):
256        return open(*args, **kwargs)
257
258
259class UnsupportedOperation(ValueError, IOError):
260    pass
261
262
263class IOBase:
264    __metaclass__ = abc.ABCMeta
265
266    """The abstract base class for all I/O classes, acting on streams of
267    bytes. There is no public constructor.
268
269    This class provides dummy implementations for many methods that
270    derived classes can override selectively; the default implementations
271    represent a file that cannot be read, written or seeked.
272
273    Even though IOBase does not declare read, readinto, or write because
274    their signatures will vary, implementations and clients should
275    consider those methods part of the interface. Also, implementations
276    may raise a IOError when operations they do not support are called.
277
278    The basic type used for binary data read from or written to a file is
279    bytes. bytearrays are accepted too, and in some cases (such as
280    readinto) needed. Text I/O classes work with str data.
281
282    Note that calling any method (even inquiries) on a closed stream is
283    undefined. Implementations may raise IOError in this case.
284
285    IOBase (and its subclasses) support the iterator protocol, meaning
286    that an IOBase object can be iterated over yielding the lines in a
287    stream.
288
289    IOBase also supports the :keyword:`with` statement. In this example,
290    fp is closed after the suite of the with statement is complete:
291
292    with open('spam.txt', 'r') as fp:
293        fp.write('Spam and eggs!')
294    """
295
296    ### Internal ###
297
298    def _unsupported(self, name):
299        """Internal: raise an exception for unsupported operations."""
300        raise UnsupportedOperation("%s.%s() not supported" %
301                                   (self.__class__.__name__, name))
302
303    ### Positioning ###
304
305    def seek(self, pos, whence=0):
306        """Change stream position.
307
308        Change the stream position to byte offset pos. Argument pos is
309        interpreted relative to the position indicated by whence.  Values
310        for whence are:
311
312        * 0 -- start of stream (the default); offset should be zero or positive
313        * 1 -- current stream position; offset may be negative
314        * 2 -- end of stream; offset is usually negative
315
316        Return the new absolute position.
317        """
318        self._unsupported("seek")
319
320    def tell(self):
321        """Return current stream position."""
322        return self.seek(0, 1)
323
324    def truncate(self, pos=None):
325        """Truncate file to size bytes.
326
327        Size defaults to the current IO position as reported by tell().  Return
328        the new size.
329        """
330        self._unsupported("truncate")
331
332    ### Flush and close ###
333
334    def flush(self):
335        """Flush write buffers, if applicable.
336
337        This is not implemented for read-only and non-blocking streams.
338        """
339        self._checkClosed()
340        # XXX Should this return the number of bytes written???
341
342    __closed = False
343
344    def close(self):
345        """Flush and close the IO object.
346
347        This method has no effect if the file is already closed.
348        """
349        if not self.__closed:
350            try:
351                self.flush()
352            finally:
353                self.__closed = True
354
355    def __del__(self):
356        """Destructor.  Calls close()."""
357        # The try/except block is in case this is called at program
358        # exit time, when it's possible that globals have already been
359        # deleted, and then the close() call might fail.  Since
360        # there's nothing we can do about such failures and they annoy
361        # the end users, we suppress the traceback.
362        try:
363            self.close()
364        except:
365            pass
366
367    ### Inquiries ###
368
369    def seekable(self):
370        """Return whether object supports random access.
371
372        If False, seek(), tell() and truncate() will raise IOError.
373        This method may need to do a test seek().
374        """
375        return False
376
377    def _checkSeekable(self, msg=None):
378        """Internal: raise an IOError if file is not seekable
379        """
380        if not self.seekable():
381            raise IOError("File or stream is not seekable."
382                          if msg is None else msg)
383
384
385    def readable(self):
386        """Return whether object was opened for reading.
387
388        If False, read() will raise IOError.
389        """
390        return False
391
392    def _checkReadable(self, msg=None):
393        """Internal: raise an IOError if file is not readable
394        """
395        if not self.readable():
396            raise IOError("File or stream is not readable."
397                          if msg is None else msg)
398
399    def writable(self):
400        """Return whether object was opened for writing.
401
402        If False, write() and truncate() will raise IOError.
403        """
404        return False
405
406    def _checkWritable(self, msg=None):
407        """Internal: raise an IOError if file is not writable
408        """
409        if not self.writable():
410            raise IOError("File or stream is not writable."
411                          if msg is None else msg)
412
413    @property
414    def closed(self):
415        """closed: bool.  True iff the file has been closed.
416
417        For backwards compatibility, this is a property, not a predicate.
418        """
419        return self.__closed
420
421    def _checkClosed(self, msg=None):
422        """Internal: raise an ValueError if file is closed
423        """
424        if self.closed:
425            raise ValueError("I/O operation on closed file."
426                             if msg is None else msg)
427
428    ### Context manager ###
429
430    def __enter__(self):
431        """Context management protocol.  Returns self."""
432        self._checkClosed()
433        return self
434
435    def __exit__(self, *args):
436        """Context management protocol.  Calls close()"""
437        self.close()
438
439    ### Lower-level APIs ###
440
441    # XXX Should these be present even if unimplemented?
442
443    def fileno(self):
444        """Returns underlying file descriptor if one exists.
445
446        An IOError is raised if the IO object does not use a file descriptor.
447        """
448        self._unsupported("fileno")
449
450    def isatty(self):
451        """Return whether this is an 'interactive' stream.
452
453        Return False if it can't be determined.
454        """
455        self._checkClosed()
456        return False
457
458    ### Readline[s] and writelines ###
459
460    def readline(self, limit=-1):
461        r"""Read and return a line from the stream.
462
463        If limit is specified, at most limit bytes will be read.
464
465        The line terminator is always b'\n' for binary files; for text
466        files, the newlines argument to open can be used to select the line
467        terminator(s) recognized.
468        """
469        # For backwards compatibility, a (slowish) readline().
470        if hasattr(self, "peek"):
471            def nreadahead():
472                readahead = self.peek(1)
473                if not readahead:
474                    return 1
475                n = (readahead.find(b"\n") + 1) or len(readahead)
476                if limit >= 0:
477                    n = min(n, limit)
478                return n
479        else:
480            def nreadahead():
481                return 1
482        if limit is None:
483            limit = -1
484        elif not isinstance(limit, (int, long)):
485            raise TypeError("limit must be an integer")
486        res = bytearray()
487        while limit < 0 or len(res) < limit:
488            b = self.read(nreadahead())
489            if not b:
490                break
491            res += b
492            if res.endswith(b"\n"):
493                break
494        return bytes(res)
495
496    def __iter__(self):
497        self._checkClosed()
498        return self
499
500    def next(self):
501        line = self.readline()
502        if not line:
503            raise StopIteration
504        return line
505
506    def readlines(self, hint=None):
507        """Return a list of lines from the stream.
508
509        hint can be specified to control the number of lines read: no more
510        lines will be read if the total size (in bytes/characters) of all
511        lines so far exceeds hint.
512        """
513        if hint is not None and not isinstance(hint, (int, long)):
514            raise TypeError("integer or None expected")
515        if hint is None or hint <= 0:
516            return list(self)
517        n = 0
518        lines = []
519        for line in self:
520            lines.append(line)
521            n += len(line)
522            if n >= hint:
523                break
524        return lines
525
526    def writelines(self, lines):
527        self._checkClosed()
528        for line in lines:
529            self.write(line)
530
531io.IOBase.register(IOBase)
532
533
534class RawIOBase(IOBase):
535
536    """Base class for raw binary I/O."""
537
538    # The read() method is implemented by calling readinto(); derived
539    # classes that want to support read() only need to implement
540    # readinto() as a primitive operation.  In general, readinto() can be
541    # more efficient than read().
542
543    # (It would be tempting to also provide an implementation of
544    # readinto() in terms of read(), in case the latter is a more suitable
545    # primitive operation, but that would lead to nasty recursion in case
546    # a subclass doesn't implement either.)
547
548    def read(self, n=-1):
549        """Read and return up to n bytes.
550
551        Returns an empty bytes object on EOF, or None if the object is
552        set not to block and has no data to read.
553        """
554        if n is None:
555            n = -1
556        if n < 0:
557            return self.readall()
558        b = bytearray(n.__index__())
559        n = self.readinto(b)
560        if n is None:
561            return None
562        del b[n:]
563        return bytes(b)
564
565    def readall(self):
566        """Read until EOF, using multiple read() call."""
567        res = bytearray()
568        while True:
569            data = self.read(DEFAULT_BUFFER_SIZE)
570            if not data:
571                break
572            res += data
573        if res:
574            return bytes(res)
575        else:
576            # b'' or None
577            return data
578
579    def readinto(self, b):
580        """Read up to len(b) bytes into b.
581
582        Returns number of bytes read (0 for EOF), or None if the object
583        is set not to block and has no data to read.
584        """
585        self._unsupported("readinto")
586
587    def write(self, b):
588        """Write the given buffer to the IO stream.
589
590        Returns the number of bytes written, which may be less than len(b).
591        """
592        self._unsupported("write")
593
594io.RawIOBase.register(RawIOBase)
595from _io import FileIO
596RawIOBase.register(FileIO)
597
598
599class BufferedIOBase(IOBase):
600
601    """Base class for buffered IO objects.
602
603    The main difference with RawIOBase is that the read() method
604    supports omitting the size argument, and does not have a default
605    implementation that defers to readinto().
606
607    In addition, read(), readinto() and write() may raise
608    BlockingIOError if the underlying raw stream is in non-blocking
609    mode and not ready; unlike their raw counterparts, they will never
610    return None.
611
612    A typical implementation should not inherit from a RawIOBase
613    implementation, but wrap one.
614    """
615
616    def read(self, n=None):
617        """Read and return up to n bytes.
618
619        If the argument is omitted, None, or negative, reads and
620        returns all data until EOF.
621
622        If the argument is positive, and the underlying raw stream is
623        not 'interactive', multiple raw reads may be issued to satisfy
624        the byte count (unless EOF is reached first).  But for
625        interactive raw streams (XXX and for pipes?), at most one raw
626        read will be issued, and a short result does not imply that
627        EOF is imminent.
628
629        Returns an empty bytes array on EOF.
630
631        Raises BlockingIOError if the underlying raw stream has no
632        data at the moment.
633        """
634        self._unsupported("read")
635
636    def read1(self, n=None):
637        """Read up to n bytes with at most one read() system call."""
638        self._unsupported("read1")
639
640    def readinto(self, b):
641        """Read up to len(b) bytes into b.
642
643        Like read(), this may issue multiple reads to the underlying raw
644        stream, unless the latter is 'interactive'.
645
646        Returns the number of bytes read (0 for EOF).
647
648        Raises BlockingIOError if the underlying raw stream has no
649        data at the moment.
650        """
651        # XXX This ought to work with anything that supports the buffer API
652        data = self.read(len(b))
653        n = len(data)
654        try:
655            b[:n] = data
656        except TypeError as err:
657            import array
658            if not isinstance(b, array.array):
659                raise err
660            b[:n] = array.array(b'b', data)
661        return n
662
663    def write(self, b):
664        """Write the given buffer to the IO stream.
665
666        Return the number of bytes written, which is never less than
667        len(b).
668
669        Raises BlockingIOError if the buffer is full and the
670        underlying raw stream cannot accept more data at the moment.
671        """
672        self._unsupported("write")
673
674    def detach(self):
675        """
676        Separate the underlying raw stream from the buffer and return it.
677
678        After the raw stream has been detached, the buffer is in an unusable
679        state.
680        """
681        self._unsupported("detach")
682
683io.BufferedIOBase.register(BufferedIOBase)
684
685
686class _BufferedIOMixin(BufferedIOBase):
687
688    """A mixin implementation of BufferedIOBase with an underlying raw stream.
689
690    This passes most requests on to the underlying raw stream.  It
691    does *not* provide implementations of read(), readinto() or
692    write().
693    """
694
695    def __init__(self, raw):
696        self._raw = raw
697
698    ### Positioning ###
699
700    def seek(self, pos, whence=0):
701        new_position = self.raw.seek(pos, whence)
702        if new_position < 0:
703            raise IOError("seek() returned an invalid position")
704        return new_position
705
706    def tell(self):
707        pos = self.raw.tell()
708        if pos < 0:
709            raise IOError("tell() returned an invalid position")
710        return pos
711
712    def truncate(self, pos=None):
713        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
714        # and a flush may be necessary to synch both views of the current
715        # file state.
716        self.flush()
717
718        if pos is None:
719            pos = self.tell()
720        # XXX: Should seek() be used, instead of passing the position
721        # XXX  directly to truncate?
722        return self.raw.truncate(pos)
723
724    ### Flush and close ###
725
726    def flush(self):
727        if self.closed:
728            raise ValueError("flush of closed file")
729        self.raw.flush()
730
731    def close(self):
732        if self.raw is not None and not self.closed:
733            try:
734                # may raise BlockingIOError or BrokenPipeError etc
735                self.flush()
736            finally:
737                self.raw.close()
738
739    def detach(self):
740        if self.raw is None:
741            raise ValueError("raw stream already detached")
742        self.flush()
743        raw = self._raw
744        self._raw = None
745        return raw
746
747    ### Inquiries ###
748
749    def seekable(self):
750        return self.raw.seekable()
751
752    def readable(self):
753        return self.raw.readable()
754
755    def writable(self):
756        return self.raw.writable()
757
758    @property
759    def raw(self):
760        return self._raw
761
762    @property
763    def closed(self):
764        return self.raw.closed
765
766    @property
767    def name(self):
768        return self.raw.name
769
770    @property
771    def mode(self):
772        return self.raw.mode
773
774    def __repr__(self):
775        clsname = self.__class__.__name__
776        try:
777            name = self.name
778        except Exception:
779            return "<_pyio.{0}>".format(clsname)
780        else:
781            return "<_pyio.{0} name={1!r}>".format(clsname, name)
782
783    ### Lower-level APIs ###
784
785    def fileno(self):
786        return self.raw.fileno()
787
788    def isatty(self):
789        return self.raw.isatty()
790
791
792class BytesIO(BufferedIOBase):
793
794    """Buffered I/O implementation using an in-memory bytes buffer."""
795
796    def __init__(self, initial_bytes=None):
797        buf = bytearray()
798        if initial_bytes is not None:
799            buf.extend(initial_bytes)
800        self._buffer = buf
801        self._pos = 0
802
803    def __getstate__(self):
804        if self.closed:
805            raise ValueError("__getstate__ on closed file")
806        return self.__dict__.copy()
807
808    def getvalue(self):
809        """Return the bytes value (contents) of the buffer
810        """
811        if self.closed:
812            raise ValueError("getvalue on closed file")
813        return bytes(self._buffer)
814
815    def read(self, n=None):
816        if self.closed:
817            raise ValueError("read from closed file")
818        if n is None:
819            n = -1
820        if not isinstance(n, (int, long)):
821            raise TypeError("integer argument expected, got {0!r}".format(
822                type(n)))
823        if n < 0:
824            n = len(self._buffer)
825        if len(self._buffer) <= self._pos:
826            return b""
827        newpos = min(len(self._buffer), self._pos + n)
828        b = self._buffer[self._pos : newpos]
829        self._pos = newpos
830        return bytes(b)
831
832    def read1(self, n):
833        """This is the same as read.
834        """
835        return self.read(n)
836
837    def write(self, b):
838        if self.closed:
839            raise ValueError("write to closed file")
840        if isinstance(b, unicode):
841            raise TypeError("can't write unicode to binary stream")
842        n = len(b)
843        if n == 0:
844            return 0
845        pos = self._pos
846        if pos > len(self._buffer):
847            # Inserts null bytes between the current end of the file
848            # and the new write position.
849            padding = b'\x00' * (pos - len(self._buffer))
850            self._buffer += padding
851        self._buffer[pos:pos + n] = b
852        self._pos += n
853        return n
854
855    def seek(self, pos, whence=0):
856        if self.closed:
857            raise ValueError("seek on closed file")
858        try:
859            pos.__index__
860        except AttributeError:
861            raise TypeError("an integer is required")
862        if whence == 0:
863            if pos < 0:
864                raise ValueError("negative seek position %r" % (pos,))
865            self._pos = pos
866        elif whence == 1:
867            self._pos = max(0, self._pos + pos)
868        elif whence == 2:
869            self._pos = max(0, len(self._buffer) + pos)
870        else:
871            raise ValueError("invalid whence value")
872        return self._pos
873
874    def tell(self):
875        if self.closed:
876            raise ValueError("tell on closed file")
877        return self._pos
878
879    def truncate(self, pos=None):
880        if self.closed:
881            raise ValueError("truncate on closed file")
882        if pos is None:
883            pos = self._pos
884        else:
885            try:
886                pos.__index__
887            except AttributeError:
888                raise TypeError("an integer is required")
889            if pos < 0:
890                raise ValueError("negative truncate position %r" % (pos,))
891        del self._buffer[pos:]
892        return pos
893
894    def readable(self):
895        if self.closed:
896            raise ValueError("I/O operation on closed file.")
897        return True
898
899    def writable(self):
900        if self.closed:
901            raise ValueError("I/O operation on closed file.")
902        return True
903
904    def seekable(self):
905        if self.closed:
906            raise ValueError("I/O operation on closed file.")
907        return True
908
909
910class BufferedReader(_BufferedIOMixin):
911
912    """BufferedReader(raw[, buffer_size])
913
914    A buffer for a readable, sequential BaseRawIO object.
915
916    The constructor creates a BufferedReader for the given readable raw
917    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
918    is used.
919    """
920
921    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
922        """Create a new buffered reader using the given readable raw IO object.
923        """
924        if not raw.readable():
925            raise IOError('"raw" argument must be readable.')
926
927        _BufferedIOMixin.__init__(self, raw)
928        if buffer_size <= 0:
929            raise ValueError("invalid buffer size")
930        self.buffer_size = buffer_size
931        self._reset_read_buf()
932        self._read_lock = Lock()
933
934    def _reset_read_buf(self):
935        self._read_buf = b""
936        self._read_pos = 0
937
938    def read(self, n=None):
939        """Read n bytes.
940
941        Returns exactly n bytes of data unless the underlying raw IO
942        stream reaches EOF or if the call would block in non-blocking
943        mode. If n is negative, read until EOF or until read() would
944        block.
945        """
946        if n is not None and n < -1:
947            raise ValueError("invalid number of bytes to read")
948        with self._read_lock:
949            return self._read_unlocked(n)
950
951    def _read_unlocked(self, n=None):
952        nodata_val = b""
953        empty_values = (b"", None)
954        buf = self._read_buf
955        pos = self._read_pos
956
957        # Special case for when the number of bytes to read is unspecified.
958        if n is None or n == -1:
959            self._reset_read_buf()
960            chunks = [buf[pos:]]  # Strip the consumed bytes.
961            current_size = 0
962            while True:
963                # Read until EOF or until read() would block.
964                try:
965                    chunk = self.raw.read()
966                except IOError as e:
967                    if e.errno != EINTR:
968                        raise
969                    continue
970                if chunk in empty_values:
971                    nodata_val = chunk
972                    break
973                current_size += len(chunk)
974                chunks.append(chunk)
975            return b"".join(chunks) or nodata_val
976
977        # The number of bytes to read is specified, return at most n bytes.
978        avail = len(buf) - pos  # Length of the available buffered data.
979        if n <= avail:
980            # Fast path: the data to read is fully buffered.
981            self._read_pos += n
982            return buf[pos:pos+n]
983        # Slow path: read from the stream until enough bytes are read,
984        # or until an EOF occurs or until read() would block.
985        chunks = [buf[pos:]]
986        wanted = max(self.buffer_size, n)
987        while avail < n:
988            try:
989                chunk = self.raw.read(wanted)
990            except IOError as e:
991                if e.errno != EINTR:
992                    raise
993                continue
994            if chunk in empty_values:
995                nodata_val = chunk
996                break
997            avail += len(chunk)
998            chunks.append(chunk)
999        # n is more then avail only when an EOF occurred or when
1000        # read() would have blocked.
1001        n = min(n, avail)
1002        out = b"".join(chunks)
1003        self._read_buf = out[n:]  # Save the extra data in the buffer.
1004        self._read_pos = 0
1005        return out[:n] if out else nodata_val
1006
1007    def peek(self, n=0):
1008        """Returns buffered bytes without advancing the position.
1009
1010        The argument indicates a desired minimal number of bytes; we
1011        do at most one raw read to satisfy it.  We never return more
1012        than self.buffer_size.
1013        """
1014        with self._read_lock:
1015            return self._peek_unlocked(n)
1016
1017    def _peek_unlocked(self, n=0):
1018        want = min(n, self.buffer_size)
1019        have = len(self._read_buf) - self._read_pos
1020        if have < want or have <= 0:
1021            to_read = self.buffer_size - have
1022            while True:
1023                try:
1024                    current = self.raw.read(to_read)
1025                except IOError as e:
1026                    if e.errno != EINTR:
1027                        raise
1028                    continue
1029                break
1030            if current:
1031                self._read_buf = self._read_buf[self._read_pos:] + current
1032                self._read_pos = 0
1033        return self._read_buf[self._read_pos:]
1034
1035    def read1(self, n):
1036        """Reads up to n bytes, with at most one read() system call."""
1037        # Returns up to n bytes.  If at least one byte is buffered, we
1038        # only return buffered bytes.  Otherwise, we do one raw read.
1039        if n < 0:
1040            raise ValueError("number of bytes to read must be positive")
1041        if n == 0:
1042            return b""
1043        with self._read_lock:
1044            self._peek_unlocked(1)
1045            return self._read_unlocked(
1046                min(n, len(self._read_buf) - self._read_pos))
1047
1048    def tell(self):
1049        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1050
1051    def seek(self, pos, whence=0):
1052        if not (0 <= whence <= 2):
1053            raise ValueError("invalid whence value")
1054        with self._read_lock:
1055            if whence == 1:
1056                pos -= len(self._read_buf) - self._read_pos
1057            pos = _BufferedIOMixin.seek(self, pos, whence)
1058            self._reset_read_buf()
1059            return pos
1060
1061class BufferedWriter(_BufferedIOMixin):
1062
1063    """A buffer for a writeable sequential RawIO object.
1064
1065    The constructor creates a BufferedWriter for the given writeable raw
1066    stream. If the buffer_size is not given, it defaults to
1067    DEFAULT_BUFFER_SIZE.
1068    """
1069
1070    _warning_stack_offset = 2
1071
1072    def __init__(self, raw,
1073                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1074        if not raw.writable():
1075            raise IOError('"raw" argument must be writable.')
1076
1077        _BufferedIOMixin.__init__(self, raw)
1078        if buffer_size <= 0:
1079            raise ValueError("invalid buffer size")
1080        if max_buffer_size is not None:
1081            warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1082                          self._warning_stack_offset)
1083        self.buffer_size = buffer_size
1084        self._write_buf = bytearray()
1085        self._write_lock = Lock()
1086
1087    def write(self, b):
1088        if self.closed:
1089            raise ValueError("write to closed file")
1090        if isinstance(b, unicode):
1091            raise TypeError("can't write unicode to binary stream")
1092        with self._write_lock:
1093            # XXX we can implement some more tricks to try and avoid
1094            # partial writes
1095            if len(self._write_buf) > self.buffer_size:
1096                # We're full, so let's pre-flush the buffer.  (This may
1097                # raise BlockingIOError with characters_written == 0.)
1098                self._flush_unlocked()
1099            before = len(self._write_buf)
1100            self._write_buf.extend(b)
1101            written = len(self._write_buf) - before
1102            if len(self._write_buf) > self.buffer_size:
1103                try:
1104                    self._flush_unlocked()
1105                except BlockingIOError as e:
1106                    if len(self._write_buf) > self.buffer_size:
1107                        # We've hit the buffer_size. We have to accept a partial
1108                        # write and cut back our buffer.
1109                        overage = len(self._write_buf) - self.buffer_size
1110                        written -= overage
1111                        self._write_buf = self._write_buf[:self.buffer_size]
1112                        raise BlockingIOError(e.errno, e.strerror, written)
1113            return written
1114
1115    def truncate(self, pos=None):
1116        with self._write_lock:
1117            self._flush_unlocked()
1118            if pos is None:
1119                pos = self.raw.tell()
1120            return self.raw.truncate(pos)
1121
1122    def flush(self):
1123        with self._write_lock:
1124            self._flush_unlocked()
1125
1126    def _flush_unlocked(self):
1127        if self.closed:
1128            raise ValueError("flush of closed file")
1129        while self._write_buf:
1130            try:
1131                n = self.raw.write(self._write_buf)
1132            except BlockingIOError:
1133                raise RuntimeError("self.raw should implement RawIOBase: it "
1134                                   "should not raise BlockingIOError")
1135            except IOError as e:
1136                if e.errno != EINTR:
1137                    raise
1138                continue
1139            if n is None:
1140                raise BlockingIOError(
1141                    errno.EAGAIN,
1142                    "write could not complete without blocking", 0)
1143            if n > len(self._write_buf) or n < 0:
1144                raise IOError("write() returned incorrect number of bytes")
1145            del self._write_buf[:n]
1146
1147    def tell(self):
1148        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1149
1150    def seek(self, pos, whence=0):
1151        if not (0 <= whence <= 2):
1152            raise ValueError("invalid whence")
1153        with self._write_lock:
1154            self._flush_unlocked()
1155            return _BufferedIOMixin.seek(self, pos, whence)
1156
1157
1158class BufferedRWPair(BufferedIOBase):
1159
1160    """A buffered reader and writer object together.
1161
1162    A buffered reader object and buffered writer object put together to
1163    form a sequential IO object that can read and write. This is typically
1164    used with a socket or two-way pipe.
1165
1166    reader and writer are RawIOBase objects that are readable and
1167    writeable respectively. If the buffer_size is omitted it defaults to
1168    DEFAULT_BUFFER_SIZE.
1169    """
1170
1171    # XXX The usefulness of this (compared to having two separate IO
1172    # objects) is questionable.
1173
1174    def __init__(self, reader, writer,
1175                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1176        """Constructor.
1177
1178        The arguments are two RawIO instances.
1179        """
1180        if max_buffer_size is not None:
1181            warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1182
1183        if not reader.readable():
1184            raise IOError('"reader" argument must be readable.')
1185
1186        if not writer.writable():
1187            raise IOError('"writer" argument must be writable.')
1188
1189        self.reader = BufferedReader(reader, buffer_size)
1190        self.writer = BufferedWriter(writer, buffer_size)
1191
1192    def read(self, n=None):
1193        if n is None:
1194            n = -1
1195        return self.reader.read(n)
1196
1197    def readinto(self, b):
1198        return self.reader.readinto(b)
1199
1200    def write(self, b):
1201        return self.writer.write(b)
1202
1203    def peek(self, n=0):
1204        return self.reader.peek(n)
1205
1206    def read1(self, n):
1207        return self.reader.read1(n)
1208
1209    def readable(self):
1210        return self.reader.readable()
1211
1212    def writable(self):
1213        return self.writer.writable()
1214
1215    def flush(self):
1216        return self.writer.flush()
1217
1218    def close(self):
1219        self.writer.close()
1220        self.reader.close()
1221
1222    def isatty(self):
1223        return self.reader.isatty() or self.writer.isatty()
1224
1225    @property
1226    def closed(self):
1227        return self.writer.closed
1228
1229
1230class BufferedRandom(BufferedWriter, BufferedReader):
1231
1232    """A buffered interface to random access streams.
1233
1234    The constructor creates a reader and writer for a seekable stream,
1235    raw, given in the first argument. If the buffer_size is omitted it
1236    defaults to DEFAULT_BUFFER_SIZE.
1237    """
1238
1239    _warning_stack_offset = 3
1240
1241    def __init__(self, raw,
1242                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1243        raw._checkSeekable()
1244        BufferedReader.__init__(self, raw, buffer_size)
1245        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1246
1247    def seek(self, pos, whence=0):
1248        if not (0 <= whence <= 2):
1249            raise ValueError("invalid whence")
1250        self.flush()
1251        if self._read_buf:
1252            # Undo read ahead.
1253            with self._read_lock:
1254                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1255        # First do the raw seek, then empty the read buffer, so that
1256        # if the raw seek fails, we don't lose buffered data forever.
1257        pos = self.raw.seek(pos, whence)
1258        with self._read_lock:
1259            self._reset_read_buf()
1260        if pos < 0:
1261            raise IOError("seek() returned invalid position")
1262        return pos
1263
1264    def tell(self):
1265        if self._write_buf:
1266            return BufferedWriter.tell(self)
1267        else:
1268            return BufferedReader.tell(self)
1269
1270    def truncate(self, pos=None):
1271        if pos is None:
1272            pos = self.tell()
1273        # Use seek to flush the read buffer.
1274        return BufferedWriter.truncate(self, pos)
1275
1276    def read(self, n=None):
1277        if n is None:
1278            n = -1
1279        self.flush()
1280        return BufferedReader.read(self, n)
1281
1282    def readinto(self, b):
1283        self.flush()
1284        return BufferedReader.readinto(self, b)
1285
1286    def peek(self, n=0):
1287        self.flush()
1288        return BufferedReader.peek(self, n)
1289
1290    def read1(self, n):
1291        self.flush()
1292        return BufferedReader.read1(self, n)
1293
1294    def write(self, b):
1295        if self._read_buf:
1296            # Undo readahead
1297            with self._read_lock:
1298                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1299                self._reset_read_buf()
1300        return BufferedWriter.write(self, b)
1301
1302
1303class TextIOBase(IOBase):
1304
1305    """Base class for text I/O.
1306
1307    This class provides a character and line based interface to stream
1308    I/O. There is no readinto method because Python's character strings
1309    are immutable. There is no public constructor.
1310    """
1311
1312    def read(self, n=-1):
1313        """Read at most n characters from stream.
1314
1315        Read from underlying buffer until we have n characters or we hit EOF.
1316        If n is negative or omitted, read until EOF.
1317        """
1318        self._unsupported("read")
1319
1320    def write(self, s):
1321        """Write string s to stream."""
1322        self._unsupported("write")
1323
1324    def truncate(self, pos=None):
1325        """Truncate size to pos."""
1326        self._unsupported("truncate")
1327
1328    def readline(self):
1329        """Read until newline or EOF.
1330
1331        Returns an empty string if EOF is hit immediately.
1332        """
1333        self._unsupported("readline")
1334
1335    def detach(self):
1336        """
1337        Separate the underlying buffer from the TextIOBase and return it.
1338
1339        After the underlying buffer has been detached, the TextIO is in an
1340        unusable state.
1341        """
1342        self._unsupported("detach")
1343
1344    @property
1345    def encoding(self):
1346        """Subclasses should override."""
1347        return None
1348
1349    @property
1350    def newlines(self):
1351        """Line endings translated so far.
1352
1353        Only line endings translated during reading are considered.
1354
1355        Subclasses should override.
1356        """
1357        return None
1358
1359    @property
1360    def errors(self):
1361        """Error setting of the decoder or encoder.
1362
1363        Subclasses should override."""
1364        return None
1365
1366io.TextIOBase.register(TextIOBase)
1367
1368
1369class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1370    r"""Codec used when reading a file in universal newlines mode.  It wraps
1371    another incremental decoder, translating \r\n and \r into \n.  It also
1372    records the types of newlines encountered.  When used with
1373    translate=False, it ensures that the newline sequence is returned in
1374    one piece.
1375    """
1376    def __init__(self, decoder, translate, errors='strict'):
1377        codecs.IncrementalDecoder.__init__(self, errors=errors)
1378        self.translate = translate
1379        self.decoder = decoder
1380        self.seennl = 0
1381        self.pendingcr = False
1382
1383    def decode(self, input, final=False):
1384        # decode input (with the eventual \r from a previous pass)
1385        if self.decoder is None:
1386            output = input
1387        else:
1388            output = self.decoder.decode(input, final=final)
1389        if self.pendingcr and (output or final):
1390            output = "\r" + output
1391            self.pendingcr = False
1392
1393        # retain last \r even when not translating data:
1394        # then readline() is sure to get \r\n in one pass
1395        if output.endswith("\r") and not final:
1396            output = output[:-1]
1397            self.pendingcr = True
1398
1399        # Record which newlines are read
1400        crlf = output.count('\r\n')
1401        cr = output.count('\r') - crlf
1402        lf = output.count('\n') - crlf
1403        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1404                    | (crlf and self._CRLF)
1405
1406        if self.translate:
1407            if crlf:
1408                output = output.replace("\r\n", "\n")
1409            if cr:
1410                output = output.replace("\r", "\n")
1411
1412        return output
1413
1414    def getstate(self):
1415        if self.decoder is None:
1416            buf = b""
1417            flag = 0
1418        else:
1419            buf, flag = self.decoder.getstate()
1420        flag <<= 1
1421        if self.pendingcr:
1422            flag |= 1
1423        return buf, flag
1424
1425    def setstate(self, state):
1426        buf, flag = state
1427        self.pendingcr = bool(flag & 1)
1428        if self.decoder is not None:
1429            self.decoder.setstate((buf, flag >> 1))
1430
1431    def reset(self):
1432        self.seennl = 0
1433        self.pendingcr = False
1434        if self.decoder is not None:
1435            self.decoder.reset()
1436
1437    _LF = 1
1438    _CR = 2
1439    _CRLF = 4
1440
1441    @property
1442    def newlines(self):
1443        return (None,
1444                "\n",
1445                "\r",
1446                ("\r", "\n"),
1447                "\r\n",
1448                ("\n", "\r\n"),
1449                ("\r", "\r\n"),
1450                ("\r", "\n", "\r\n")
1451               )[self.seennl]
1452
1453
1454class TextIOWrapper(TextIOBase):
1455
1456    r"""Character and line based layer over a BufferedIOBase object, buffer.
1457
1458    encoding gives the name of the encoding that the stream will be
1459    decoded or encoded with. It defaults to locale.getpreferredencoding.
1460
1461    errors determines the strictness of encoding and decoding (see the
1462    codecs.register) and defaults to "strict".
1463
1464    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1465    handling of line endings. If it is None, universal newlines is
1466    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1467    or '\r\n' are translated to '\n' before being returned to the
1468    caller. Conversely, on output, '\n' is translated to the system
1469    default line separator, os.linesep. If newline is any other of its
1470    legal values, that newline becomes the newline when the file is read
1471    and it is returned untranslated. On output, '\n' is converted to the
1472    newline.
1473
1474    If line_buffering is True, a call to flush is implied when a call to
1475    write contains a newline character.
1476    """
1477
1478    _CHUNK_SIZE = 2048
1479
1480    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1481                 line_buffering=False):
1482        if newline is not None and not isinstance(newline, basestring):
1483            raise TypeError("illegal newline type: %r" % (type(newline),))
1484        if newline not in (None, "", "\n", "\r", "\r\n"):
1485            raise ValueError("illegal newline value: %r" % (newline,))
1486        if encoding is None:
1487            try:
1488                import locale
1489            except ImportError:
1490                # Importing locale may fail if Python is being built
1491                encoding = "ascii"
1492            else:
1493                encoding = locale.getpreferredencoding()
1494
1495        if not isinstance(encoding, basestring):
1496            raise ValueError("invalid encoding: %r" % encoding)
1497
1498        if errors is None:
1499            errors = "strict"
1500        else:
1501            if not isinstance(errors, basestring):
1502                raise ValueError("invalid errors: %r" % errors)
1503
1504        self._buffer = buffer
1505        self._line_buffering = line_buffering
1506        self._encoding = encoding
1507        self._errors = errors
1508        self._readuniversal = not newline
1509        self._readtranslate = newline is None
1510        self._readnl = newline
1511        self._writetranslate = newline != ''
1512        self._writenl = newline or os.linesep
1513        self._encoder = None
1514        self._decoder = None
1515        self._decoded_chars = ''  # buffer for text returned from decoder
1516        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1517        self._snapshot = None  # info for reconstructing decoder state
1518        self._seekable = self._telling = self.buffer.seekable()
1519
1520        if self._seekable and self.writable():
1521            position = self.buffer.tell()
1522            if position != 0:
1523                try:
1524                    self._get_encoder().setstate(0)
1525                except LookupError:
1526                    # Sometimes the encoder doesn't exist
1527                    pass
1528
1529    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1530    # where dec_flags is the second (integer) item of the decoder state
1531    # and next_input is the chunk of input bytes that comes next after the
1532    # snapshot point.  We use this to reconstruct decoder states in tell().
1533
1534    # Naming convention:
1535    #   - "bytes_..." for integer variables that count input bytes
1536    #   - "chars_..." for integer variables that count decoded characters
1537
1538    def __repr__(self):
1539        try:
1540            name = self.name
1541        except Exception:
1542            return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1543        else:
1544            return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1545                name, self.encoding)
1546
1547    @property
1548    def encoding(self):
1549        return self._encoding
1550
1551    @property
1552    def errors(self):
1553        return self._errors
1554
1555    @property
1556    def line_buffering(self):
1557        return self._line_buffering
1558
1559    @property
1560    def buffer(self):
1561        return self._buffer
1562
1563    def seekable(self):
1564        if self.closed:
1565            raise ValueError("I/O operation on closed file.")
1566        return self._seekable
1567
1568    def readable(self):
1569        return self.buffer.readable()
1570
1571    def writable(self):
1572        return self.buffer.writable()
1573
1574    def flush(self):
1575        self.buffer.flush()
1576        self._telling = self._seekable
1577
1578    def close(self):
1579        if self.buffer is not None and not self.closed:
1580            try:
1581                self.flush()
1582            finally:
1583                self.buffer.close()
1584
1585    @property
1586    def closed(self):
1587        return self.buffer.closed
1588
1589    @property
1590    def name(self):
1591        return self.buffer.name
1592
1593    def fileno(self):
1594        return self.buffer.fileno()
1595
1596    def isatty(self):
1597        return self.buffer.isatty()
1598
1599    def write(self, s):
1600        if self.closed:
1601            raise ValueError("write to closed file")
1602        if not isinstance(s, unicode):
1603            raise TypeError("can't write %s to text stream" %
1604                            s.__class__.__name__)
1605        length = len(s)
1606        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1607        if haslf and self._writetranslate and self._writenl != "\n":
1608            s = s.replace("\n", self._writenl)
1609        encoder = self._encoder or self._get_encoder()
1610        # XXX What if we were just reading?
1611        b = encoder.encode(s)
1612        self.buffer.write(b)
1613        if self._line_buffering and (haslf or "\r" in s):
1614            self.flush()
1615        self._snapshot = None
1616        if self._decoder:
1617            self._decoder.reset()
1618        return length
1619
1620    def _get_encoder(self):
1621        make_encoder = codecs.getincrementalencoder(self._encoding)
1622        self._encoder = make_encoder(self._errors)
1623        return self._encoder
1624
1625    def _get_decoder(self):
1626        make_decoder = codecs.getincrementaldecoder(self._encoding)
1627        decoder = make_decoder(self._errors)
1628        if self._readuniversal:
1629            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1630        self._decoder = decoder
1631        return decoder
1632
1633    # The following three methods implement an ADT for _decoded_chars.
1634    # Text returned from the decoder is buffered here until the client
1635    # requests it by calling our read() or readline() method.
1636    def _set_decoded_chars(self, chars):
1637        """Set the _decoded_chars buffer."""
1638        self._decoded_chars = chars
1639        self._decoded_chars_used = 0
1640
1641    def _get_decoded_chars(self, n=None):
1642        """Advance into the _decoded_chars buffer."""
1643        offset = self._decoded_chars_used
1644        if n is None:
1645            chars = self._decoded_chars[offset:]
1646        else:
1647            chars = self._decoded_chars[offset:offset + n]
1648        self._decoded_chars_used += len(chars)
1649        return chars
1650
1651    def _rewind_decoded_chars(self, n):
1652        """Rewind the _decoded_chars buffer."""
1653        if self._decoded_chars_used < n:
1654            raise AssertionError("rewind decoded_chars out of bounds")
1655        self._decoded_chars_used -= n
1656
1657    def _read_chunk(self):
1658        """
1659        Read and decode the next chunk of data from the BufferedReader.
1660        """
1661
1662        # The return value is True unless EOF was reached.  The decoded
1663        # string is placed in self._decoded_chars (replacing its previous
1664        # value).  The entire input chunk is sent to the decoder, though
1665        # some of it may remain buffered in the decoder, yet to be
1666        # converted.
1667
1668        if self._decoder is None:
1669            raise ValueError("no decoder")
1670
1671        if self._telling:
1672            # To prepare for tell(), we need to snapshot a point in the
1673            # file where the decoder's input buffer is empty.
1674
1675            dec_buffer, dec_flags = self._decoder.getstate()
1676            # Given this, we know there was a valid snapshot point
1677            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1678
1679        # Read a chunk, decode it, and put the result in self._decoded_chars.
1680        input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1681        eof = not input_chunk
1682        self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1683
1684        if self._telling:
1685            # At the snapshot point, len(dec_buffer) bytes before the read,
1686            # the next input to be decoded is dec_buffer + input_chunk.
1687            self._snapshot = (dec_flags, dec_buffer + input_chunk)
1688
1689        return not eof
1690
1691    def _pack_cookie(self, position, dec_flags=0,
1692                           bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1693        # The meaning of a tell() cookie is: seek to position, set the
1694        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1695        # into the decoder with need_eof as the EOF flag, then skip
1696        # chars_to_skip characters of the decoded result.  For most simple
1697        # decoders, tell() will often just give a byte offset in the file.
1698        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1699               (chars_to_skip<<192) | bool(need_eof)<<256)
1700
1701    def _unpack_cookie(self, bigint):
1702        rest, position = divmod(bigint, 1<<64)
1703        rest, dec_flags = divmod(rest, 1<<64)
1704        rest, bytes_to_feed = divmod(rest, 1<<64)
1705        need_eof, chars_to_skip = divmod(rest, 1<<64)
1706        return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1707
1708    def tell(self):
1709        if not self._seekable:
1710            raise IOError("underlying stream is not seekable")
1711        if not self._telling:
1712            raise IOError("telling position disabled by next() call")
1713        self.flush()
1714        position = self.buffer.tell()
1715        decoder = self._decoder
1716        if decoder is None or self._snapshot is None:
1717            if self._decoded_chars:
1718                # This should never happen.
1719                raise AssertionError("pending decoded text")
1720            return position
1721
1722        # Skip backward to the snapshot point (see _read_chunk).
1723        dec_flags, next_input = self._snapshot
1724        position -= len(next_input)
1725
1726        # How many decoded characters have been used up since the snapshot?
1727        chars_to_skip = self._decoded_chars_used
1728        if chars_to_skip == 0:
1729            # We haven't moved from the snapshot point.
1730            return self._pack_cookie(position, dec_flags)
1731
1732        # Starting from the snapshot position, we will walk the decoder
1733        # forward until it gives us enough decoded characters.
1734        saved_state = decoder.getstate()
1735        try:
1736            # Note our initial start point.
1737            decoder.setstate((b'', dec_flags))
1738            start_pos = position
1739            start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1740            need_eof = 0
1741
1742            # Feed the decoder one byte at a time.  As we go, note the
1743            # nearest "safe start point" before the current location
1744            # (a point where the decoder has nothing buffered, so seek()
1745            # can safely start from there and advance to this location).
1746            for next_byte in next_input:
1747                bytes_fed += 1
1748                chars_decoded += len(decoder.decode(next_byte))
1749                dec_buffer, dec_flags = decoder.getstate()
1750                if not dec_buffer and chars_decoded <= chars_to_skip:
1751                    # Decoder buffer is empty, so this is a safe start point.
1752                    start_pos += bytes_fed
1753                    chars_to_skip -= chars_decoded
1754                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1755                if chars_decoded >= chars_to_skip:
1756                    break
1757            else:
1758                # We didn't get enough decoded data; signal EOF to get more.
1759                chars_decoded += len(decoder.decode(b'', final=True))
1760                need_eof = 1
1761                if chars_decoded < chars_to_skip:
1762                    raise IOError("can't reconstruct logical file position")
1763
1764            # The returned cookie corresponds to the last safe start point.
1765            return self._pack_cookie(
1766                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1767        finally:
1768            decoder.setstate(saved_state)
1769
1770    def truncate(self, pos=None):
1771        self.flush()
1772        if pos is None:
1773            pos = self.tell()
1774        return self.buffer.truncate(pos)
1775
1776    def detach(self):
1777        if self.buffer is None:
1778            raise ValueError("buffer is already detached")
1779        self.flush()
1780        buffer = self._buffer
1781        self._buffer = None
1782        return buffer
1783
1784    def seek(self, cookie, whence=0):
1785        if self.closed:
1786            raise ValueError("tell on closed file")
1787        if not self._seekable:
1788            raise IOError("underlying stream is not seekable")
1789        if whence == 1: # seek relative to current position
1790            if cookie != 0:
1791                raise IOError("can't do nonzero cur-relative seeks")
1792            # Seeking to the current position should attempt to
1793            # sync the underlying buffer with the current position.
1794            whence = 0
1795            cookie = self.tell()
1796        if whence == 2: # seek relative to end of file
1797            if cookie != 0:
1798                raise IOError("can't do nonzero end-relative seeks")
1799            self.flush()
1800            position = self.buffer.seek(0, 2)
1801            self._set_decoded_chars('')
1802            self._snapshot = None
1803            if self._decoder:
1804                self._decoder.reset()
1805            return position
1806        if whence != 0:
1807            raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1808                             (whence,))
1809        if cookie < 0:
1810            raise ValueError("negative seek position %r" % (cookie,))
1811        self.flush()
1812
1813        # The strategy of seek() is to go back to the safe start point
1814        # and replay the effect of read(chars_to_skip) from there.
1815        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1816            self._unpack_cookie(cookie)
1817
1818        # Seek back to the safe start point.
1819        self.buffer.seek(start_pos)
1820        self._set_decoded_chars('')
1821        self._snapshot = None
1822
1823        # Restore the decoder to its state from the safe start point.
1824        if cookie == 0 and self._decoder:
1825            self._decoder.reset()
1826        elif self._decoder or dec_flags or chars_to_skip:
1827            self._decoder = self._decoder or self._get_decoder()
1828            self._decoder.setstate((b'', dec_flags))
1829            self._snapshot = (dec_flags, b'')
1830
1831        if chars_to_skip:
1832            # Just like _read_chunk, feed the decoder and save a snapshot.
1833            input_chunk = self.buffer.read(bytes_to_feed)
1834            self._set_decoded_chars(
1835                self._decoder.decode(input_chunk, need_eof))
1836            self._snapshot = (dec_flags, input_chunk)
1837
1838            # Skip chars_to_skip of the decoded characters.
1839            if len(self._decoded_chars) < chars_to_skip:
1840                raise IOError("can't restore logical file position")
1841            self._decoded_chars_used = chars_to_skip
1842
1843        # Finally, reset the encoder (merely useful for proper BOM handling)
1844        try:
1845            encoder = self._encoder or self._get_encoder()
1846        except LookupError:
1847            # Sometimes the encoder doesn't exist
1848            pass
1849        else:
1850            if cookie != 0:
1851                encoder.setstate(0)
1852            else:
1853                encoder.reset()
1854        return cookie
1855
1856    def read(self, n=None):
1857        self._checkReadable()
1858        if n is None:
1859            n = -1
1860        decoder = self._decoder or self._get_decoder()
1861        try:
1862            n.__index__
1863        except AttributeError:
1864            raise TypeError("an integer is required")
1865        if n < 0:
1866            # Read everything.
1867            result = (self._get_decoded_chars() +
1868                      decoder.decode(self.buffer.read(), final=True))
1869            self._set_decoded_chars('')
1870            self._snapshot = None
1871            return result
1872        else:
1873            # Keep reading chunks until we have n characters to return.
1874            eof = False
1875            result = self._get_decoded_chars(n)
1876            while len(result) < n and not eof:
1877                eof = not self._read_chunk()
1878                result += self._get_decoded_chars(n - len(result))
1879            return result
1880
1881    def next(self):
1882        self._telling = False
1883        line = self.readline()
1884        if not line:
1885            self._snapshot = None
1886            self._telling = self._seekable
1887            raise StopIteration
1888        return line
1889
1890    def readline(self, limit=None):
1891        if self.closed:
1892            raise ValueError("read from closed file")
1893        if limit is None:
1894            limit = -1
1895        elif not isinstance(limit, (int, long)):
1896            raise TypeError("limit must be an integer")
1897
1898        # Grab all the decoded text (we will rewind any extra bits later).
1899        line = self._get_decoded_chars()
1900
1901        start = 0
1902        # Make the decoder if it doesn't already exist.
1903        if not self._decoder:
1904            self._get_decoder()
1905
1906        pos = endpos = None
1907        while True:
1908            if self._readtranslate:
1909                # Newlines are already translated, only search for \n
1910                pos = line.find('\n', start)
1911                if pos >= 0:
1912                    endpos = pos + 1
1913                    break
1914                else:
1915                    start = len(line)
1916
1917            elif self._readuniversal:
1918                # Universal newline search. Find any of \r, \r\n, \n
1919                # The decoder ensures that \r\n are not split in two pieces
1920
1921                # In C we'd look for these in parallel of course.
1922                nlpos = line.find("\n", start)
1923                crpos = line.find("\r", start)
1924                if crpos == -1:
1925                    if nlpos == -1:
1926                        # Nothing found
1927                        start = len(line)
1928                    else:
1929                        # Found \n
1930                        endpos = nlpos + 1
1931                        break
1932                elif nlpos == -1:
1933                    # Found lone \r
1934                    endpos = crpos + 1
1935                    break
1936                elif nlpos < crpos:
1937                    # Found \n
1938                    endpos = nlpos + 1
1939                    break
1940                elif nlpos == crpos + 1:
1941                    # Found \r\n
1942                    endpos = crpos + 2
1943                    break
1944                else:
1945                    # Found \r
1946                    endpos = crpos + 1
1947                    break
1948            else:
1949                # non-universal
1950                pos = line.find(self._readnl)
1951                if pos >= 0:
1952                    endpos = pos + len(self._readnl)
1953                    break
1954
1955            if limit >= 0 and len(line) >= limit:
1956                endpos = limit  # reached length limit
1957                break
1958
1959            # No line ending seen yet - get more data'
1960            while self._read_chunk():
1961                if self._decoded_chars:
1962                    break
1963            if self._decoded_chars:
1964                line += self._get_decoded_chars()
1965            else:
1966                # end of file
1967                self._set_decoded_chars('')
1968                self._snapshot = None
1969                return line
1970
1971        if limit >= 0 and endpos > limit:
1972            endpos = limit  # don't exceed limit
1973
1974        # Rewind _decoded_chars to just after the line ending we found.
1975        self._rewind_decoded_chars(len(line) - endpos)
1976        return line[:endpos]
1977
1978    @property
1979    def newlines(self):
1980        return self._decoder.newlines if self._decoder else None
1981
1982
1983class StringIO(TextIOWrapper):
1984    """Text I/O implementation using an in-memory buffer.
1985
1986    The initial_value argument sets the value of object.  The newline
1987    argument is like the one of TextIOWrapper's constructor.
1988    """
1989
1990    def __init__(self, initial_value="", newline="\n"):
1991        super(StringIO, self).__init__(BytesIO(),
1992                                       encoding="utf-8",
1993                                       errors="strict",
1994                                       newline=newline)
1995        # Issue #5645: make universal newlines semantics the same as in the
1996        # C version, even under Windows.
1997        if newline is None:
1998            self._writetranslate = False
1999        if initial_value:
2000            if not isinstance(initial_value, unicode):
2001                initial_value = unicode(initial_value)
2002            self.write(initial_value)
2003            self.seek(0)
2004
2005    def getvalue(self):
2006        self.flush()
2007        decoder = self._decoder or self._get_decoder()
2008        old_state = decoder.getstate()
2009        decoder.reset()
2010        try:
2011            return decoder.decode(self.buffer.getvalue(), final=True)
2012        finally:
2013            decoder.setstate(old_state)
2014
2015    def __repr__(self):
2016        # TextIOWrapper tells the encoding in its repr. In StringIO,
2017        # that's a implementation detail.
2018        return object.__repr__(self)
2019
2020    @property
2021    def errors(self):
2022        return None
2023
2024    @property
2025    def encoding(self):
2026        return None
2027
2028    def detach(self):
2029        # This doesn't make sense on StringIO.
2030        self._unsupported("detach")
2031