1/*
2    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17    "Base class for text I/O.\n"
18    "\n"
19    "This class provides a character and line based interface to stream\n"
20    "I/O. There is no readinto method because Python's character strings\n"
21    "are immutable. There is no public constructor.\n"
22    );
23
24static PyObject *
25_unsupported(const char *message)
26{
27    PyErr_SetString(_PyIO_unsupported_operation, message);
28    return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32    "Separate the underlying buffer from the TextIOBase and return it.\n"
33    "\n"
34    "After the underlying buffer has been detached, the TextIO is in an\n"
35    "unusable state.\n"
36    );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41    return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45    "Read at most n characters from stream.\n"
46    "\n"
47    "Read from underlying buffer until we have n characters or we hit EOF.\n"
48    "If n is negative or omitted, read until EOF.\n"
49    );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54    return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58    "Read until newline or EOF.\n"
59    "\n"
60    "Returns an empty string if EOF is hit immediately.\n"
61    );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66    return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70    "Write string to stream.\n"
71    "Returns the number of characters written (which is always equal to\n"
72    "the length of the string).\n"
73    );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78    return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82    "Encoding of the text stream.\n"
83    "\n"
84    "Subclasses should override.\n"
85    );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90    Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94    "Line endings translated so far.\n"
95    "\n"
96    "Only line endings translated during reading are considered.\n"
97    "\n"
98    "Subclasses should override.\n"
99    );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104    Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108    "The error setting of the decoder or encoder.\n"
109    "\n"
110    "Subclasses should override.\n"
111    );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116    Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121    {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122    {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123    {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124    {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125    {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129    {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130    {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131    {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132    {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136    PyVarObject_HEAD_INIT(NULL, 0)
137    "_io._TextIOBase",          /*tp_name*/
138    0,                          /*tp_basicsize*/
139    0,                          /*tp_itemsize*/
140    0,                          /*tp_dealloc*/
141    0,                          /*tp_print*/
142    0,                          /*tp_getattr*/
143    0,                          /*tp_setattr*/
144    0,                          /*tp_compare */
145    0,                          /*tp_repr*/
146    0,                          /*tp_as_number*/
147    0,                          /*tp_as_sequence*/
148    0,                          /*tp_as_mapping*/
149    0,                          /*tp_hash */
150    0,                          /*tp_call*/
151    0,                          /*tp_str*/
152    0,                          /*tp_getattro*/
153    0,                          /*tp_setattro*/
154    0,                          /*tp_as_buffer*/
155    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
156    textiobase_doc,             /* tp_doc */
157    0,                          /* tp_traverse */
158    0,                          /* tp_clear */
159    0,                          /* tp_richcompare */
160    0,                          /* tp_weaklistoffset */
161    0,                          /* tp_iter */
162    0,                          /* tp_iternext */
163    textiobase_methods,         /* tp_methods */
164    0,                          /* tp_members */
165    textiobase_getset,          /* tp_getset */
166    &PyIOBase_Type,             /* tp_base */
167    0,                          /* tp_dict */
168    0,                          /* tp_descr_get */
169    0,                          /* tp_descr_set */
170    0,                          /* tp_dictoffset */
171    0,                          /* tp_init */
172    0,                          /* tp_alloc */
173    0,                          /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180    "Codec used when reading a file in universal newlines mode.  It wraps\n"
181    "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
182    "records the types of newlines encountered.  When used with\n"
183    "translate=False, it ensures that the newline sequence is returned in\n"
184    "one piece. When used with decoder=None, it expects unicode strings as\n"
185    "decode input and translates newlines without first invoking an external\n"
186    "decoder.\n"
187    );
188
189typedef struct {
190    PyObject_HEAD
191    PyObject *decoder;
192    PyObject *errors;
193    signed int pendingcr: 1;
194    signed int translate: 1;
195    unsigned int seennl: 3;
196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200                               PyObject *args, PyObject *kwds)
201{
202    PyObject *decoder;
203    int translate;
204    PyObject *errors = NULL;
205    char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207    if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208                                     kwlist, &decoder, &translate, &errors))
209        return -1;
210
211    self->decoder = decoder;
212    Py_INCREF(decoder);
213
214    if (errors == NULL) {
215        self->errors = PyUnicode_FromString("strict");
216        if (self->errors == NULL)
217            return -1;
218    }
219    else {
220        Py_INCREF(errors);
221        self->errors = errors;
222    }
223
224    self->translate = translate;
225    self->seennl = 0;
226    self->pendingcr = 0;
227
228    return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234    Py_CLEAR(self->decoder);
235    Py_CLEAR(self->errors);
236    Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR   1
240#define SEEN_LF   2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
246                                    PyObject *input, int final)
247{
248    PyObject *output;
249    Py_ssize_t output_len;
250    nldecoder_object *self = (nldecoder_object *) _self;
251
252    if (self->decoder == NULL) {
253        PyErr_SetString(PyExc_ValueError,
254                        "IncrementalNewlineDecoder.__init__ not called");
255        return NULL;
256    }
257
258    /* decode input (with the eventual \r from a previous pass) */
259    if (self->decoder != Py_None) {
260        output = PyObject_CallMethodObjArgs(self->decoder,
261            _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262    }
263    else {
264        output = input;
265        Py_INCREF(output);
266    }
267
268    if (output == NULL)
269        return NULL;
270
271    if (!PyUnicode_Check(output)) {
272        PyErr_SetString(PyExc_TypeError,
273                        "decoder should return a string result");
274        goto error;
275    }
276
277    output_len = PyUnicode_GET_SIZE(output);
278    if (self->pendingcr && (final || output_len > 0)) {
279        Py_UNICODE *out;
280        PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281        if (modified == NULL)
282            goto error;
283        out = PyUnicode_AS_UNICODE(modified);
284        out[0] = '\r';
285        memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286               output_len * sizeof(Py_UNICODE));
287        Py_DECREF(output);
288        output = modified;
289        self->pendingcr = 0;
290        output_len++;
291    }
292
293    /* retain last \r even when not translating data:
294     * then readline() is sure to get \r\n in one pass
295     */
296    if (!final) {
297        if (output_len > 0
298            && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300            if (Py_REFCNT(output) == 1) {
301                if (PyUnicode_Resize(&output, output_len - 1) < 0)
302                    goto error;
303            }
304            else {
305                PyObject *modified = PyUnicode_FromUnicode(
306                    PyUnicode_AS_UNICODE(output),
307                    output_len - 1);
308                if (modified == NULL)
309                    goto error;
310                Py_DECREF(output);
311                output = modified;
312            }
313            self->pendingcr = 1;
314        }
315    }
316
317    /* Record which newlines are read and do newline translation if desired,
318       all in one pass. */
319    {
320        Py_UNICODE *in_str;
321        Py_ssize_t len;
322        int seennl = self->seennl;
323        int only_lf = 0;
324
325        in_str = PyUnicode_AS_UNICODE(output);
326        len = PyUnicode_GET_SIZE(output);
327
328        if (len == 0)
329            return output;
330
331        /* If, up to now, newlines are consistently \n, do a quick check
332           for the \r *byte* with the libc's optimized memchr.
333           */
334        if (seennl == SEEN_LF || seennl == 0) {
335            only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336        }
337
338        if (only_lf) {
339            /* If not already seen, quick scan for a possible "\n" character.
340               (there's nothing else to be done, even when in translation mode)
341            */
342            if (seennl == 0 &&
343                memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344                Py_UNICODE *s, *end;
345                s = in_str;
346                end = in_str + len;
347                for (;;) {
348                    Py_UNICODE c;
349                    /* Fast loop for non-control characters */
350                    while (*s > '\n')
351                        s++;
352                    c = *s++;
353                    if (c == '\n') {
354                        seennl |= SEEN_LF;
355                        break;
356                    }
357                    if (s > end)
358                        break;
359                }
360            }
361            /* Finished: we have scanned for newlines, and none of them
362               need translating */
363        }
364        else if (!self->translate) {
365            Py_UNICODE *s, *end;
366            /* We have already seen all newline types, no need to scan again */
367            if (seennl == SEEN_ALL)
368                goto endscan;
369            s = in_str;
370            end = in_str + len;
371            for (;;) {
372                Py_UNICODE c;
373                /* Fast loop for non-control characters */
374                while (*s > '\r')
375                    s++;
376                c = *s++;
377                if (c == '\n')
378                    seennl |= SEEN_LF;
379                else if (c == '\r') {
380                    if (*s == '\n') {
381                        seennl |= SEEN_CRLF;
382                        s++;
383                    }
384                    else
385                        seennl |= SEEN_CR;
386                }
387                if (s > end)
388                    break;
389                if (seennl == SEEN_ALL)
390                    break;
391            }
392        endscan:
393            ;
394        }
395        else {
396            PyObject *translated = NULL;
397            Py_UNICODE *out_str;
398            Py_UNICODE *in, *out, *end;
399            if (Py_REFCNT(output) != 1) {
400                /* We could try to optimize this so that we only do a copy
401                   when there is something to translate. On the other hand,
402                   most decoders should only output non-shared strings, i.e.
403                   translation is done in place. */
404                translated = PyUnicode_FromUnicode(NULL, len);
405                if (translated == NULL)
406                    goto error;
407                assert(Py_REFCNT(translated) == 1);
408                memcpy(PyUnicode_AS_UNICODE(translated),
409                       PyUnicode_AS_UNICODE(output),
410                       len * sizeof(Py_UNICODE));
411            }
412            else {
413                translated = output;
414            }
415            out_str = PyUnicode_AS_UNICODE(translated);
416            in = in_str;
417            out = out_str;
418            end = in_str + len;
419            for (;;) {
420                Py_UNICODE c;
421                /* Fast loop for non-control characters */
422                while ((c = *in++) > '\r')
423                    *out++ = c;
424                if (c == '\n') {
425                    *out++ = c;
426                    seennl |= SEEN_LF;
427                    continue;
428                }
429                if (c == '\r') {
430                    if (*in == '\n') {
431                        in++;
432                        seennl |= SEEN_CRLF;
433                    }
434                    else
435                        seennl |= SEEN_CR;
436                    *out++ = '\n';
437                    continue;
438                }
439                if (in > end)
440                    break;
441                *out++ = c;
442            }
443            if (translated != output) {
444                Py_DECREF(output);
445                output = translated;
446            }
447            if (out - out_str != len) {
448                if (PyUnicode_Resize(&output, out - out_str) < 0)
449                    goto error;
450            }
451        }
452        self->seennl |= seennl;
453    }
454
455    return output;
456
457  error:
458    Py_DECREF(output);
459    return NULL;
460}
461
462static PyObject *
463incrementalnewlinedecoder_decode(nldecoder_object *self,
464                                 PyObject *args, PyObject *kwds)
465{
466    char *kwlist[] = {"input", "final", NULL};
467    PyObject *input;
468    int final = 0;
469
470    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471                                     kwlist, &input, &final))
472        return NULL;
473    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478{
479    PyObject *buffer;
480    unsigned PY_LONG_LONG flag;
481
482    if (self->decoder != Py_None) {
483        PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484           _PyIO_str_getstate, NULL);
485        if (state == NULL)
486            return NULL;
487        if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488            Py_DECREF(state);
489            return NULL;
490        }
491        Py_INCREF(buffer);
492        Py_DECREF(state);
493    }
494    else {
495        buffer = PyBytes_FromString("");
496        flag = 0;
497    }
498    flag <<= 1;
499    if (self->pendingcr)
500        flag |= 1;
501    return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506{
507    PyObject *buffer;
508    unsigned PY_LONG_LONG flag;
509
510    if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511        return NULL;
512
513    self->pendingcr = (int) flag & 1;
514    flag >>= 1;
515
516    if (self->decoder != Py_None)
517        return PyObject_CallMethod(self->decoder,
518                                   "setstate", "((OK))", buffer, flag);
519    else
520        Py_RETURN_NONE;
521}
522
523static PyObject *
524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525{
526    self->seennl = 0;
527    self->pendingcr = 0;
528    if (self->decoder != Py_None)
529        return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530    else
531        Py_RETURN_NONE;
532}
533
534static PyObject *
535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536{
537    switch (self->seennl) {
538    case SEEN_CR:
539        return PyUnicode_FromString("\r");
540    case SEEN_LF:
541        return PyUnicode_FromString("\n");
542    case SEEN_CRLF:
543        return PyUnicode_FromString("\r\n");
544    case SEEN_CR | SEEN_LF:
545        return Py_BuildValue("ss", "\r", "\n");
546    case SEEN_CR | SEEN_CRLF:
547        return Py_BuildValue("ss", "\r", "\r\n");
548    case SEEN_LF | SEEN_CRLF:
549        return Py_BuildValue("ss", "\n", "\r\n");
550    case SEEN_CR | SEEN_LF | SEEN_CRLF:
551        return Py_BuildValue("sss", "\r", "\n", "\r\n");
552    default:
553        Py_RETURN_NONE;
554   }
555
556}
557
558
559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560    {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561    {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562    {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563    {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564    {NULL}
565};
566
567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568    {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569    {NULL}
570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573    PyVarObject_HEAD_INIT(NULL, 0)
574    "_io.IncrementalNewlineDecoder", /*tp_name*/
575    sizeof(nldecoder_object), /*tp_basicsize*/
576    0,                          /*tp_itemsize*/
577    (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578    0,                          /*tp_print*/
579    0,                          /*tp_getattr*/
580    0,                          /*tp_setattr*/
581    0,                          /*tp_compare */
582    0,                          /*tp_repr*/
583    0,                          /*tp_as_number*/
584    0,                          /*tp_as_sequence*/
585    0,                          /*tp_as_mapping*/
586    0,                          /*tp_hash */
587    0,                          /*tp_call*/
588    0,                          /*tp_str*/
589    0,                          /*tp_getattro*/
590    0,                          /*tp_setattro*/
591    0,                          /*tp_as_buffer*/
592    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
593    incrementalnewlinedecoder_doc,          /* tp_doc */
594    0,                          /* tp_traverse */
595    0,                          /* tp_clear */
596    0,                          /* tp_richcompare */
597    0,                          /*tp_weaklistoffset*/
598    0,                          /* tp_iter */
599    0,                          /* tp_iternext */
600    incrementalnewlinedecoder_methods, /* tp_methods */
601    0,                          /* tp_members */
602    incrementalnewlinedecoder_getset, /* tp_getset */
603    0,                          /* tp_base */
604    0,                          /* tp_dict */
605    0,                          /* tp_descr_get */
606    0,                          /* tp_descr_set */
607    0,                          /* tp_dictoffset */
608    (initproc)incrementalnewlinedecoder_init, /* tp_init */
609    0,                          /* tp_alloc */
610    PyType_GenericNew,          /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
616PyDoc_STRVAR(textiowrapper_doc,
617    "Character and line based layer over a BufferedIOBase object, buffer.\n"
618    "\n"
619    "encoding gives the name of the encoding that the stream will be\n"
620    "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621    "\n"
622    "errors determines the strictness of encoding and decoding (see the\n"
623    "codecs.register) and defaults to \"strict\".\n"
624    "\n"
625    "newline can be None, '', '\\n', '\\r', or '\\r\\n'.  It controls the\n"
626    "handling of line endings. If it is None, universal newlines is\n"
627    "enabled.  With this enabled, on input, the lines endings '\\n', '\\r',\n"
628    "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629    "caller. Conversely, on output, '\\n' is translated to the system\n"
630    "default line seperator, os.linesep. If newline is any other of its\n"
631    "legal values, that newline becomes the newline when the file is read\n"
632    "and it is returned untranslated. On output, '\\n' is converted to the\n"
633    "newline.\n"
634    "\n"
635    "If line_buffering is True, a call to flush is implied when a call to\n"
636    "write contains a newline character."
637    );
638
639typedef PyObject *
640        (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644    PyObject_HEAD
645    int ok; /* initialized? */
646    int detached;
647    Py_ssize_t chunk_size;
648    PyObject *buffer;
649    PyObject *encoding;
650    PyObject *encoder;
651    PyObject *decoder;
652    PyObject *readnl;
653    PyObject *errors;
654    const char *writenl; /* utf-8 encoded, NULL stands for \n */
655    char line_buffering;
656    char readuniversal;
657    char readtranslate;
658    char writetranslate;
659    char seekable;
660    char telling;
661    /* Specialized encoding func (see below) */
662    encodefunc_t encodefunc;
663    /* Whether or not it's the start of the stream */
664    char encoding_start_of_stream;
665
666    /* Reads and writes are internally buffered in order to speed things up.
667       However, any read will first flush the write buffer if itsn't empty.
668
669       Please also note that text to be written is first encoded before being
670       buffered. This is necessary so that encoding errors are immediately
671       reported to the caller, but it unfortunately means that the
672       IncrementalEncoder (whose encode() method is always written in Python)
673       becomes a bottleneck for small writes.
674    */
675    PyObject *decoded_chars;       /* buffer for text returned from decoder */
676    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677    PyObject *pending_bytes;       /* list of bytes objects waiting to be
678                                      written, or NULL */
679    Py_ssize_t pending_bytes_count;
680    PyObject *snapshot;
681    /* snapshot is either None, or a tuple (dec_flags, next_input) where
682     * dec_flags is the second (integer) item of the decoder state and
683     * next_input is the chunk of input bytes that comes next after the
684     * snapshot point.  We use this to reconstruct decoder states in tell().
685     */
686
687    /* Cache raw object if it's a FileIO object */
688    PyObject *raw;
689
690    PyObject *weakreflist;
691    PyObject *dict;
692} textio;
693
694
695/* A couple of specialized cases in order to bypass the slow incremental
696   encoding methods for the most popular encodings. */
697
698static PyObject *
699ascii_encode(textio *self, PyObject *text)
700{
701    return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702                                 PyUnicode_GET_SIZE(text),
703                                 PyBytes_AS_STRING(self->errors));
704}
705
706static PyObject *
707utf16be_encode(textio *self, PyObject *text)
708{
709    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710                                 PyUnicode_GET_SIZE(text),
711                                 PyBytes_AS_STRING(self->errors), 1);
712}
713
714static PyObject *
715utf16le_encode(textio *self, PyObject *text)
716{
717    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718                                 PyUnicode_GET_SIZE(text),
719                                 PyBytes_AS_STRING(self->errors), -1);
720}
721
722static PyObject *
723utf16_encode(textio *self, PyObject *text)
724{
725    if (!self->encoding_start_of_stream) {
726        /* Skip the BOM and use native byte ordering */
727#if defined(WORDS_BIGENDIAN)
728        return utf16be_encode(self, text);
729#else
730        return utf16le_encode(self, text);
731#endif
732    }
733    return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734                                 PyUnicode_GET_SIZE(text),
735                                 PyBytes_AS_STRING(self->errors), 0);
736}
737
738static PyObject *
739utf32be_encode(textio *self, PyObject *text)
740{
741    return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742                                 PyUnicode_GET_SIZE(text),
743                                 PyBytes_AS_STRING(self->errors), 1);
744}
745
746static PyObject *
747utf32le_encode(textio *self, PyObject *text)
748{
749    return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750                                 PyUnicode_GET_SIZE(text),
751                                 PyBytes_AS_STRING(self->errors), -1);
752}
753
754static PyObject *
755utf32_encode(textio *self, PyObject *text)
756{
757    if (!self->encoding_start_of_stream) {
758        /* Skip the BOM and use native byte ordering */
759#if defined(WORDS_BIGENDIAN)
760        return utf32be_encode(self, text);
761#else
762        return utf32le_encode(self, text);
763#endif
764    }
765    return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766                                 PyUnicode_GET_SIZE(text),
767                                 PyBytes_AS_STRING(self->errors), 0);
768}
769
770static PyObject *
771utf8_encode(textio *self, PyObject *text)
772{
773    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774                                PyUnicode_GET_SIZE(text),
775                                PyBytes_AS_STRING(self->errors));
776}
777
778static PyObject *
779latin1_encode(textio *self, PyObject *text)
780{
781    return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782                                  PyUnicode_GET_SIZE(text),
783                                  PyBytes_AS_STRING(self->errors));
784}
785
786/* Map normalized encoding names onto the specialized encoding funcs */
787
788typedef struct {
789    const char *name;
790    encodefunc_t encodefunc;
791} encodefuncentry;
792
793static encodefuncentry encodefuncs[] = {
794    {"ascii",       (encodefunc_t) ascii_encode},
795    {"iso8859-1",   (encodefunc_t) latin1_encode},
796    {"utf-8",       (encodefunc_t) utf8_encode},
797    {"utf-16-be",   (encodefunc_t) utf16be_encode},
798    {"utf-16-le",   (encodefunc_t) utf16le_encode},
799    {"utf-16",      (encodefunc_t) utf16_encode},
800    {"utf-32-be",   (encodefunc_t) utf32be_encode},
801    {"utf-32-le",   (encodefunc_t) utf32le_encode},
802    {"utf-32",      (encodefunc_t) utf32_encode},
803    {NULL, NULL}
804};
805
806
807static int
808textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
809{
810    char *kwlist[] = {"buffer", "encoding", "errors",
811                      "newline", "line_buffering",
812                      NULL};
813    PyObject *buffer, *raw;
814    char *encoding = NULL;
815    char *errors = NULL;
816    char *newline = NULL;
817    int line_buffering = 0;
818
819    PyObject *res;
820    int r;
821
822    self->ok = 0;
823    self->detached = 0;
824    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
825                                     kwlist, &buffer, &encoding, &errors,
826                                     &newline, &line_buffering))
827        return -1;
828
829    if (newline && newline[0] != '\0'
830        && !(newline[0] == '\n' && newline[1] == '\0')
831        && !(newline[0] == '\r' && newline[1] == '\0')
832        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
833        PyErr_Format(PyExc_ValueError,
834                     "illegal newline value: %s", newline);
835        return -1;
836    }
837
838    Py_CLEAR(self->buffer);
839    Py_CLEAR(self->encoding);
840    Py_CLEAR(self->encoder);
841    Py_CLEAR(self->decoder);
842    Py_CLEAR(self->readnl);
843    Py_CLEAR(self->decoded_chars);
844    Py_CLEAR(self->pending_bytes);
845    Py_CLEAR(self->snapshot);
846    Py_CLEAR(self->errors);
847    Py_CLEAR(self->raw);
848    self->decoded_chars_used = 0;
849    self->pending_bytes_count = 0;
850    self->encodefunc = NULL;
851    self->writenl = NULL;
852
853    if (encoding == NULL && self->encoding == NULL) {
854        if (_PyIO_locale_module == NULL) {
855            _PyIO_locale_module = PyImport_ImportModule("locale");
856            if (_PyIO_locale_module == NULL)
857                goto catch_ImportError;
858            else
859                goto use_locale;
860        }
861        else {
862          use_locale:
863            self->encoding = PyObject_CallMethod(
864                _PyIO_locale_module, "getpreferredencoding", NULL);
865            if (self->encoding == NULL) {
866              catch_ImportError:
867                /*
868                 Importing locale can raise a ImportError because of
869                 _functools, and locale.getpreferredencoding can raise a
870                 ImportError if _locale is not available.  These will happen
871                 during module building.
872                */
873                if (PyErr_ExceptionMatches(PyExc_ImportError)) {
874                    PyErr_Clear();
875                    self->encoding = PyString_FromString("ascii");
876                }
877                else
878                    goto error;
879            }
880            else if (!PyString_Check(self->encoding))
881                Py_CLEAR(self->encoding);
882        }
883    }
884    if (self->encoding != NULL)
885        encoding = PyString_AsString(self->encoding);
886    else if (encoding != NULL) {
887        self->encoding = PyString_FromString(encoding);
888        if (self->encoding == NULL)
889            goto error;
890    }
891    else {
892        PyErr_SetString(PyExc_IOError,
893                        "could not determine default encoding");
894    }
895
896    if (errors == NULL)
897        errors = "strict";
898    self->errors = PyBytes_FromString(errors);
899    if (self->errors == NULL)
900        goto error;
901
902    self->chunk_size = 8192;
903    self->readuniversal = (newline == NULL || newline[0] == '\0');
904    self->line_buffering = line_buffering;
905    self->readtranslate = (newline == NULL);
906    if (newline) {
907        self->readnl = PyString_FromString(newline);
908        if (self->readnl == NULL)
909            return -1;
910    }
911    self->writetranslate = (newline == NULL || newline[0] != '\0');
912    if (!self->readuniversal && self->writetranslate) {
913        self->writenl = PyString_AsString(self->readnl);
914        if (!strcmp(self->writenl, "\n"))
915            self->writenl = NULL;
916    }
917#ifdef MS_WINDOWS
918    else
919        self->writenl = "\r\n";
920#endif
921
922    /* Build the decoder object */
923    res = PyObject_CallMethod(buffer, "readable", NULL);
924    if (res == NULL)
925        goto error;
926    r = PyObject_IsTrue(res);
927    Py_DECREF(res);
928    if (r == -1)
929        goto error;
930    if (r == 1) {
931        self->decoder = PyCodec_IncrementalDecoder(
932            encoding, errors);
933        if (self->decoder == NULL)
934            goto error;
935
936        if (self->readuniversal) {
937            PyObject *incrementalDecoder = PyObject_CallFunction(
938                (PyObject *)&PyIncrementalNewlineDecoder_Type,
939                "Oi", self->decoder, (int)self->readtranslate);
940            if (incrementalDecoder == NULL)
941                goto error;
942            Py_CLEAR(self->decoder);
943            self->decoder = incrementalDecoder;
944        }
945    }
946
947    /* Build the encoder object */
948    res = PyObject_CallMethod(buffer, "writable", NULL);
949    if (res == NULL)
950        goto error;
951    r = PyObject_IsTrue(res);
952    Py_DECREF(res);
953    if (r == -1)
954        goto error;
955    if (r == 1) {
956        PyObject *ci;
957        self->encoder = PyCodec_IncrementalEncoder(
958            encoding, errors);
959        if (self->encoder == NULL)
960            goto error;
961        /* Get the normalized named of the codec */
962        ci = _PyCodec_Lookup(encoding);
963        if (ci == NULL)
964            goto error;
965        res = PyObject_GetAttrString(ci, "name");
966        Py_DECREF(ci);
967        if (res == NULL) {
968            if (PyErr_ExceptionMatches(PyExc_AttributeError))
969                PyErr_Clear();
970            else
971                goto error;
972        }
973        else if (PyString_Check(res)) {
974            encodefuncentry *e = encodefuncs;
975            while (e->name != NULL) {
976                if (!strcmp(PyString_AS_STRING(res), e->name)) {
977                    self->encodefunc = e->encodefunc;
978                    break;
979                }
980                e++;
981            }
982        }
983        Py_XDECREF(res);
984    }
985
986    self->buffer = buffer;
987    Py_INCREF(buffer);
988
989    if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
990        Py_TYPE(buffer) == &PyBufferedWriter_Type ||
991        Py_TYPE(buffer) == &PyBufferedRandom_Type) {
992        raw = PyObject_GetAttrString(buffer, "raw");
993        /* Cache the raw FileIO object to speed up 'closed' checks */
994        if (raw == NULL) {
995            if (PyErr_ExceptionMatches(PyExc_AttributeError))
996                PyErr_Clear();
997            else
998                goto error;
999        }
1000        else if (Py_TYPE(raw) == &PyFileIO_Type)
1001            self->raw = raw;
1002        else
1003            Py_DECREF(raw);
1004    }
1005
1006    res = PyObject_CallMethod(buffer, "seekable", NULL);
1007    if (res == NULL)
1008        goto error;
1009    self->seekable = self->telling = PyObject_IsTrue(res);
1010    Py_DECREF(res);
1011
1012    self->encoding_start_of_stream = 0;
1013    if (self->seekable && self->encoder) {
1014        PyObject *cookieObj;
1015        int cmp;
1016
1017        self->encoding_start_of_stream = 1;
1018
1019        cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1020        if (cookieObj == NULL)
1021            goto error;
1022
1023        cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1024        Py_DECREF(cookieObj);
1025        if (cmp < 0) {
1026            goto error;
1027        }
1028
1029        if (cmp == 0) {
1030            self->encoding_start_of_stream = 0;
1031            res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1032                                             _PyIO_zero, NULL);
1033            if (res == NULL)
1034                goto error;
1035            Py_DECREF(res);
1036        }
1037    }
1038
1039    self->ok = 1;
1040    return 0;
1041
1042  error:
1043    return -1;
1044}
1045
1046static int
1047_textiowrapper_clear(textio *self)
1048{
1049    if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1050        return -1;
1051    self->ok = 0;
1052    Py_CLEAR(self->buffer);
1053    Py_CLEAR(self->encoding);
1054    Py_CLEAR(self->encoder);
1055    Py_CLEAR(self->decoder);
1056    Py_CLEAR(self->readnl);
1057    Py_CLEAR(self->decoded_chars);
1058    Py_CLEAR(self->pending_bytes);
1059    Py_CLEAR(self->snapshot);
1060    Py_CLEAR(self->errors);
1061    Py_CLEAR(self->raw);
1062    return 0;
1063}
1064
1065static void
1066textiowrapper_dealloc(textio *self)
1067{
1068    if (_textiowrapper_clear(self) < 0)
1069        return;
1070    _PyObject_GC_UNTRACK(self);
1071    if (self->weakreflist != NULL)
1072        PyObject_ClearWeakRefs((PyObject *)self);
1073    Py_CLEAR(self->dict);
1074    Py_TYPE(self)->tp_free((PyObject *)self);
1075}
1076
1077static int
1078textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1079{
1080    Py_VISIT(self->buffer);
1081    Py_VISIT(self->encoding);
1082    Py_VISIT(self->encoder);
1083    Py_VISIT(self->decoder);
1084    Py_VISIT(self->readnl);
1085    Py_VISIT(self->decoded_chars);
1086    Py_VISIT(self->pending_bytes);
1087    Py_VISIT(self->snapshot);
1088    Py_VISIT(self->errors);
1089    Py_VISIT(self->raw);
1090
1091    Py_VISIT(self->dict);
1092    return 0;
1093}
1094
1095static int
1096textiowrapper_clear(textio *self)
1097{
1098    if (_textiowrapper_clear(self) < 0)
1099        return -1;
1100    Py_CLEAR(self->dict);
1101    return 0;
1102}
1103
1104static PyObject *
1105textiowrapper_closed_get(textio *self, void *context);
1106
1107/* This macro takes some shortcuts to make the common case faster. */
1108#define CHECK_CLOSED(self) \
1109    do { \
1110        int r; \
1111        PyObject *_res; \
1112        if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1113            if (self->raw != NULL) \
1114                r = _PyFileIO_closed(self->raw); \
1115            else { \
1116                _res = textiowrapper_closed_get(self, NULL); \
1117                if (_res == NULL) \
1118                    return NULL; \
1119                r = PyObject_IsTrue(_res); \
1120                Py_DECREF(_res); \
1121                if (r < 0) \
1122                    return NULL; \
1123            } \
1124            if (r > 0) { \
1125                PyErr_SetString(PyExc_ValueError, \
1126                                "I/O operation on closed file."); \
1127                return NULL; \
1128            } \
1129        } \
1130        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1131            return NULL; \
1132    } while (0)
1133
1134#define CHECK_INITIALIZED(self) \
1135    if (self->ok <= 0) { \
1136        if (self->detached) { \
1137            PyErr_SetString(PyExc_ValueError, \
1138                 "underlying buffer has been detached"); \
1139        } else {                                   \
1140            PyErr_SetString(PyExc_ValueError, \
1141                "I/O operation on uninitialized object"); \
1142        } \
1143        return NULL; \
1144    }
1145
1146#define CHECK_INITIALIZED_INT(self) \
1147    if (self->ok <= 0) { \
1148        if (self->detached) { \
1149            PyErr_SetString(PyExc_ValueError, \
1150                 "underlying buffer has been detached"); \
1151        } else {                                   \
1152            PyErr_SetString(PyExc_ValueError, \
1153                "I/O operation on uninitialized object"); \
1154        } \
1155        return -1; \
1156    }
1157
1158
1159static PyObject *
1160textiowrapper_detach(textio *self)
1161{
1162    PyObject *buffer, *res;
1163    CHECK_INITIALIZED(self);
1164    res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1165    if (res == NULL)
1166        return NULL;
1167    Py_DECREF(res);
1168    buffer = self->buffer;
1169    self->buffer = NULL;
1170    self->detached = 1;
1171    self->ok = 0;
1172    return buffer;
1173}
1174
1175Py_LOCAL_INLINE(const Py_UNICODE *)
1176findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1177{
1178    /* like wcschr, but doesn't stop at NULL characters */
1179    while (size-- > 0) {
1180        if (*s == ch)
1181            return s;
1182        s++;
1183    }
1184    return NULL;
1185}
1186
1187/* Flush the internal write buffer. This doesn't explicitly flush the
1188   underlying buffered object, though. */
1189static int
1190_textiowrapper_writeflush(textio *self)
1191{
1192    PyObject *pending, *b, *ret;
1193
1194    if (self->pending_bytes == NULL)
1195        return 0;
1196
1197    pending = self->pending_bytes;
1198    Py_INCREF(pending);
1199    self->pending_bytes_count = 0;
1200    Py_CLEAR(self->pending_bytes);
1201
1202    b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1203    Py_DECREF(pending);
1204    if (b == NULL)
1205        return -1;
1206    ret = PyObject_CallMethodObjArgs(self->buffer,
1207                                     _PyIO_str_write, b, NULL);
1208    Py_DECREF(b);
1209    if (ret == NULL)
1210        return -1;
1211    Py_DECREF(ret);
1212    return 0;
1213}
1214
1215static PyObject *
1216textiowrapper_write(textio *self, PyObject *args)
1217{
1218    PyObject *ret;
1219    PyObject *text; /* owned reference */
1220    PyObject *b;
1221    Py_ssize_t textlen;
1222    int haslf = 0;
1223    int needflush = 0;
1224
1225    CHECK_INITIALIZED(self);
1226
1227    if (!PyArg_ParseTuple(args, "U:write", &text)) {
1228        return NULL;
1229    }
1230
1231    CHECK_CLOSED(self);
1232
1233    if (self->encoder == NULL) {
1234        PyErr_SetString(PyExc_IOError, "not writable");
1235        return NULL;
1236    }
1237
1238    Py_INCREF(text);
1239
1240    textlen = PyUnicode_GetSize(text);
1241
1242    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1243        if (findchar(PyUnicode_AS_UNICODE(text),
1244                     PyUnicode_GET_SIZE(text), '\n'))
1245            haslf = 1;
1246
1247    if (haslf && self->writetranslate && self->writenl != NULL) {
1248        PyObject *newtext = PyObject_CallMethod(
1249            text, "replace", "ss", "\n", self->writenl);
1250        Py_DECREF(text);
1251        if (newtext == NULL)
1252            return NULL;
1253        text = newtext;
1254    }
1255
1256    if (self->line_buffering &&
1257        (haslf ||
1258         findchar(PyUnicode_AS_UNICODE(text),
1259                  PyUnicode_GET_SIZE(text), '\r')))
1260        needflush = 1;
1261
1262    /* XXX What if we were just reading? */
1263    if (self->encodefunc != NULL) {
1264        b = (*self->encodefunc)((PyObject *) self, text);
1265        self->encoding_start_of_stream = 0;
1266    }
1267    else
1268        b = PyObject_CallMethodObjArgs(self->encoder,
1269                                       _PyIO_str_encode, text, NULL);
1270    Py_DECREF(text);
1271    if (b == NULL)
1272        return NULL;
1273
1274    if (self->pending_bytes == NULL) {
1275        self->pending_bytes = PyList_New(0);
1276        if (self->pending_bytes == NULL) {
1277            Py_DECREF(b);
1278            return NULL;
1279        }
1280        self->pending_bytes_count = 0;
1281    }
1282    if (PyList_Append(self->pending_bytes, b) < 0) {
1283        Py_DECREF(b);
1284        return NULL;
1285    }
1286    self->pending_bytes_count += PyBytes_GET_SIZE(b);
1287    Py_DECREF(b);
1288    if (self->pending_bytes_count > self->chunk_size || needflush) {
1289        if (_textiowrapper_writeflush(self) < 0)
1290            return NULL;
1291    }
1292
1293    if (needflush) {
1294        ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1295        if (ret == NULL)
1296            return NULL;
1297        Py_DECREF(ret);
1298    }
1299
1300    Py_CLEAR(self->snapshot);
1301
1302    if (self->decoder) {
1303        ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1304        if (ret == NULL)
1305            return NULL;
1306        Py_DECREF(ret);
1307    }
1308
1309    return PyLong_FromSsize_t(textlen);
1310}
1311
1312/* Steal a reference to chars and store it in the decoded_char buffer;
1313 */
1314static void
1315textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1316{
1317    Py_CLEAR(self->decoded_chars);
1318    self->decoded_chars = chars;
1319    self->decoded_chars_used = 0;
1320}
1321
1322static PyObject *
1323textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1324{
1325    PyObject *chars;
1326    Py_ssize_t avail;
1327
1328    if (self->decoded_chars == NULL)
1329        return PyUnicode_FromStringAndSize(NULL, 0);
1330
1331    avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1332             - self->decoded_chars_used);
1333
1334    assert(avail >= 0);
1335
1336    if (n < 0 || n > avail)
1337        n = avail;
1338
1339    if (self->decoded_chars_used > 0 || n < avail) {
1340        chars = PyUnicode_FromUnicode(
1341            PyUnicode_AS_UNICODE(self->decoded_chars)
1342            + self->decoded_chars_used, n);
1343        if (chars == NULL)
1344            return NULL;
1345    }
1346    else {
1347        chars = self->decoded_chars;
1348        Py_INCREF(chars);
1349    }
1350
1351    self->decoded_chars_used += n;
1352    return chars;
1353}
1354
1355/* Read and decode the next chunk of data from the BufferedReader.
1356 */
1357static int
1358textiowrapper_read_chunk(textio *self)
1359{
1360    PyObject *dec_buffer = NULL;
1361    PyObject *dec_flags = NULL;
1362    PyObject *input_chunk = NULL;
1363    PyObject *decoded_chars, *chunk_size;
1364    int eof;
1365
1366    /* The return value is True unless EOF was reached.  The decoded string is
1367     * placed in self._decoded_chars (replacing its previous value).  The
1368     * entire input chunk is sent to the decoder, though some of it may remain
1369     * buffered in the decoder, yet to be converted.
1370     */
1371
1372    if (self->decoder == NULL) {
1373        PyErr_SetString(PyExc_IOError, "not readable");
1374        return -1;
1375    }
1376
1377    if (self->telling) {
1378        /* To prepare for tell(), we need to snapshot a point in the file
1379         * where the decoder's input buffer is empty.
1380         */
1381
1382        PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1383                                                     _PyIO_str_getstate, NULL);
1384        if (state == NULL)
1385            return -1;
1386        /* Given this, we know there was a valid snapshot point
1387         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1388         */
1389        if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1390            Py_DECREF(state);
1391            return -1;
1392        }
1393        Py_INCREF(dec_buffer);
1394        Py_INCREF(dec_flags);
1395        Py_DECREF(state);
1396    }
1397
1398    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1399    chunk_size = PyLong_FromSsize_t(self->chunk_size);
1400    if (chunk_size == NULL)
1401        goto fail;
1402    input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1403        _PyIO_str_read1, chunk_size, NULL);
1404    Py_DECREF(chunk_size);
1405    if (input_chunk == NULL)
1406        goto fail;
1407    assert(PyBytes_Check(input_chunk));
1408
1409    eof = (PyBytes_Size(input_chunk) == 0);
1410
1411    if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1412        decoded_chars = _PyIncrementalNewlineDecoder_decode(
1413            self->decoder, input_chunk, eof);
1414    }
1415    else {
1416        decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1417            _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1418    }
1419
1420    /* TODO sanity check: isinstance(decoded_chars, unicode) */
1421    if (decoded_chars == NULL)
1422        goto fail;
1423    textiowrapper_set_decoded_chars(self, decoded_chars);
1424    if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1425        eof = 0;
1426
1427    if (self->telling) {
1428        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1429         * next input to be decoded is dec_buffer + input_chunk.
1430         */
1431        PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1432        if (next_input == NULL)
1433            goto fail;
1434        assert (PyBytes_Check(next_input));
1435        Py_DECREF(dec_buffer);
1436        Py_CLEAR(self->snapshot);
1437        self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1438    }
1439    Py_DECREF(input_chunk);
1440
1441    return (eof == 0);
1442
1443  fail:
1444    Py_XDECREF(dec_buffer);
1445    Py_XDECREF(dec_flags);
1446    Py_XDECREF(input_chunk);
1447    return -1;
1448}
1449
1450static PyObject *
1451textiowrapper_read(textio *self, PyObject *args)
1452{
1453    Py_ssize_t n = -1;
1454    PyObject *result = NULL, *chunks = NULL;
1455
1456    CHECK_INITIALIZED(self);
1457
1458    if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1459        return NULL;
1460
1461    CHECK_CLOSED(self);
1462
1463    if (self->decoder == NULL) {
1464        PyErr_SetString(PyExc_IOError, "not readable");
1465        return NULL;
1466    }
1467
1468    if (_textiowrapper_writeflush(self) < 0)
1469        return NULL;
1470
1471    if (n < 0) {
1472        /* Read everything */
1473        PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1474        PyObject *decoded, *final;
1475        if (bytes == NULL)
1476            goto fail;
1477        decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1478                                             bytes, Py_True, NULL);
1479        Py_DECREF(bytes);
1480        if (decoded == NULL)
1481            goto fail;
1482
1483        result = textiowrapper_get_decoded_chars(self, -1);
1484
1485        if (result == NULL) {
1486            Py_DECREF(decoded);
1487            return NULL;
1488        }
1489
1490        final = PyUnicode_Concat(result, decoded);
1491        Py_DECREF(result);
1492        Py_DECREF(decoded);
1493        if (final == NULL)
1494            goto fail;
1495
1496        Py_CLEAR(self->snapshot);
1497        return final;
1498    }
1499    else {
1500        int res = 1;
1501        Py_ssize_t remaining = n;
1502
1503        result = textiowrapper_get_decoded_chars(self, n);
1504        if (result == NULL)
1505            goto fail;
1506        remaining -= PyUnicode_GET_SIZE(result);
1507
1508        /* Keep reading chunks until we have n characters to return */
1509        while (remaining > 0) {
1510            res = textiowrapper_read_chunk(self);
1511            if (res < 0)
1512                goto fail;
1513            if (res == 0)  /* EOF */
1514                break;
1515            if (chunks == NULL) {
1516                chunks = PyList_New(0);
1517                if (chunks == NULL)
1518                    goto fail;
1519            }
1520            if (PyList_Append(chunks, result) < 0)
1521                goto fail;
1522            Py_DECREF(result);
1523            result = textiowrapper_get_decoded_chars(self, remaining);
1524            if (result == NULL)
1525                goto fail;
1526            remaining -= PyUnicode_GET_SIZE(result);
1527        }
1528        if (chunks != NULL) {
1529            if (result != NULL && PyList_Append(chunks, result) < 0)
1530                goto fail;
1531            Py_CLEAR(result);
1532            result = PyUnicode_Join(_PyIO_empty_str, chunks);
1533            if (result == NULL)
1534                goto fail;
1535            Py_CLEAR(chunks);
1536        }
1537        return result;
1538    }
1539  fail:
1540    Py_XDECREF(result);
1541    Py_XDECREF(chunks);
1542    return NULL;
1543}
1544
1545
1546/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1547   that is to the NUL character. Otherwise the function will produce
1548   incorrect results. */
1549static Py_UNICODE *
1550find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1551{
1552    Py_UNICODE *s = start;
1553    for (;;) {
1554        while (*s > ch)
1555            s++;
1556        if (*s == ch)
1557            return s;
1558        if (s == end)
1559            return NULL;
1560        s++;
1561    }
1562}
1563
1564Py_ssize_t
1565_PyIO_find_line_ending(
1566    int translated, int universal, PyObject *readnl,
1567    Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1568{
1569    Py_ssize_t len = end - start;
1570
1571    if (translated) {
1572        /* Newlines are already translated, only search for \n */
1573        Py_UNICODE *pos = find_control_char(start, end, '\n');
1574        if (pos != NULL)
1575            return pos - start + 1;
1576        else {
1577            *consumed = len;
1578            return -1;
1579        }
1580    }
1581    else if (universal) {
1582        /* Universal newline search. Find any of \r, \r\n, \n
1583         * The decoder ensures that \r\n are not split in two pieces
1584         */
1585        Py_UNICODE *s = start;
1586        for (;;) {
1587            Py_UNICODE ch;
1588            /* Fast path for non-control chars. The loop always ends
1589               since the Py_UNICODE storage is NUL-terminated. */
1590            while (*s > '\r')
1591                s++;
1592            if (s >= end) {
1593                *consumed = len;
1594                return -1;
1595            }
1596            ch = *s++;
1597            if (ch == '\n')
1598                return s - start;
1599            if (ch == '\r') {
1600                if (*s == '\n')
1601                    return s - start + 1;
1602                else
1603                    return s - start;
1604            }
1605        }
1606    }
1607    else {
1608        /* Non-universal mode. */
1609        Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1610        unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1611        if (readnl_len == 1) {
1612            Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1613            if (pos != NULL)
1614                return pos - start + 1;
1615            *consumed = len;
1616            return -1;
1617        }
1618        else {
1619            Py_UNICODE *s = start;
1620            Py_UNICODE *e = end - readnl_len + 1;
1621            Py_UNICODE *pos;
1622            if (e < s)
1623                e = s;
1624            while (s < e) {
1625                Py_ssize_t i;
1626                Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1627                if (pos == NULL || pos >= e)
1628                    break;
1629                for (i = 1; i < readnl_len; i++) {
1630                    if (pos[i] != nl[i])
1631                        break;
1632                }
1633                if (i == readnl_len)
1634                    return pos - start + readnl_len;
1635                s = pos + 1;
1636            }
1637            pos = find_control_char(e, end, nl[0]);
1638            if (pos == NULL)
1639                *consumed = len;
1640            else
1641                *consumed = pos - start;
1642            return -1;
1643        }
1644    }
1645}
1646
1647static PyObject *
1648_textiowrapper_readline(textio *self, Py_ssize_t limit)
1649{
1650    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1651    Py_ssize_t start, endpos, chunked, offset_to_buffer;
1652    int res;
1653
1654    CHECK_CLOSED(self);
1655
1656    if (_textiowrapper_writeflush(self) < 0)
1657        return NULL;
1658
1659    chunked = 0;
1660
1661    while (1) {
1662        Py_UNICODE *ptr;
1663        Py_ssize_t line_len;
1664        Py_ssize_t consumed = 0;
1665
1666        /* First, get some data if necessary */
1667        res = 1;
1668        while (!self->decoded_chars ||
1669               !PyUnicode_GET_SIZE(self->decoded_chars)) {
1670            res = textiowrapper_read_chunk(self);
1671            if (res < 0)
1672                goto error;
1673            if (res == 0)
1674                break;
1675        }
1676        if (res == 0) {
1677            /* end of file */
1678            textiowrapper_set_decoded_chars(self, NULL);
1679            Py_CLEAR(self->snapshot);
1680            start = endpos = offset_to_buffer = 0;
1681            break;
1682        }
1683
1684        if (remaining == NULL) {
1685            line = self->decoded_chars;
1686            start = self->decoded_chars_used;
1687            offset_to_buffer = 0;
1688            Py_INCREF(line);
1689        }
1690        else {
1691            assert(self->decoded_chars_used == 0);
1692            line = PyUnicode_Concat(remaining, self->decoded_chars);
1693            start = 0;
1694            offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1695            Py_CLEAR(remaining);
1696            if (line == NULL)
1697                goto error;
1698        }
1699
1700        ptr = PyUnicode_AS_UNICODE(line);
1701        line_len = PyUnicode_GET_SIZE(line);
1702
1703        endpos = _PyIO_find_line_ending(
1704            self->readtranslate, self->readuniversal, self->readnl,
1705            ptr + start, ptr + line_len, &consumed);
1706        if (endpos >= 0) {
1707            endpos += start;
1708            if (limit >= 0 && (endpos - start) + chunked >= limit)
1709                endpos = start + limit - chunked;
1710            break;
1711        }
1712
1713        /* We can put aside up to `endpos` */
1714        endpos = consumed + start;
1715        if (limit >= 0 && (endpos - start) + chunked >= limit) {
1716            /* Didn't find line ending, but reached length limit */
1717            endpos = start + limit - chunked;
1718            break;
1719        }
1720
1721        if (endpos > start) {
1722            /* No line ending seen yet - put aside current data */
1723            PyObject *s;
1724            if (chunks == NULL) {
1725                chunks = PyList_New(0);
1726                if (chunks == NULL)
1727                    goto error;
1728            }
1729            s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1730            if (s == NULL)
1731                goto error;
1732            if (PyList_Append(chunks, s) < 0) {
1733                Py_DECREF(s);
1734                goto error;
1735            }
1736            chunked += PyUnicode_GET_SIZE(s);
1737            Py_DECREF(s);
1738        }
1739        /* There may be some remaining bytes we'll have to prepend to the
1740           next chunk of data */
1741        if (endpos < line_len) {
1742            remaining = PyUnicode_FromUnicode(
1743                    ptr + endpos, line_len - endpos);
1744            if (remaining == NULL)
1745                goto error;
1746        }
1747        Py_CLEAR(line);
1748        /* We have consumed the buffer */
1749        textiowrapper_set_decoded_chars(self, NULL);
1750    }
1751
1752    if (line != NULL) {
1753        /* Our line ends in the current buffer */
1754        self->decoded_chars_used = endpos - offset_to_buffer;
1755        if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1756            if (start == 0 && Py_REFCNT(line) == 1) {
1757                if (PyUnicode_Resize(&line, endpos) < 0)
1758                    goto error;
1759            }
1760            else {
1761                PyObject *s = PyUnicode_FromUnicode(
1762                        PyUnicode_AS_UNICODE(line) + start, endpos - start);
1763                Py_CLEAR(line);
1764                if (s == NULL)
1765                    goto error;
1766                line = s;
1767            }
1768        }
1769    }
1770    if (remaining != NULL) {
1771        if (chunks == NULL) {
1772            chunks = PyList_New(0);
1773            if (chunks == NULL)
1774                goto error;
1775        }
1776        if (PyList_Append(chunks, remaining) < 0)
1777            goto error;
1778        Py_CLEAR(remaining);
1779    }
1780    if (chunks != NULL) {
1781        if (line != NULL && PyList_Append(chunks, line) < 0)
1782            goto error;
1783        Py_CLEAR(line);
1784        line = PyUnicode_Join(_PyIO_empty_str, chunks);
1785        if (line == NULL)
1786            goto error;
1787        Py_DECREF(chunks);
1788    }
1789    if (line == NULL)
1790        line = PyUnicode_FromStringAndSize(NULL, 0);
1791
1792    return line;
1793
1794  error:
1795    Py_XDECREF(chunks);
1796    Py_XDECREF(remaining);
1797    Py_XDECREF(line);
1798    return NULL;
1799}
1800
1801static PyObject *
1802textiowrapper_readline(textio *self, PyObject *args)
1803{
1804    PyObject *limitobj = NULL;
1805    Py_ssize_t limit = -1;
1806
1807    CHECK_INITIALIZED(self);
1808    if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1809        return NULL;
1810    }
1811    if (limitobj) {
1812        if (!PyNumber_Check(limitobj)) {
1813            PyErr_Format(PyExc_TypeError,
1814                         "integer argument expected, got '%.200s'",
1815                         Py_TYPE(limitobj)->tp_name);
1816            return NULL;
1817        }
1818        limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1819        if (limit == -1 && PyErr_Occurred())
1820            return NULL;
1821    }
1822    return _textiowrapper_readline(self, limit);
1823}
1824
1825/* Seek and Tell */
1826
1827typedef struct {
1828    Py_off_t start_pos;
1829    int dec_flags;
1830    int bytes_to_feed;
1831    int chars_to_skip;
1832    char need_eof;
1833} cookie_type;
1834
1835/*
1836   To speed up cookie packing/unpacking, we store the fields in a temporary
1837   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1838   The following macros define at which offsets in the intermediary byte
1839   string the various CookieStruct fields will be stored.
1840 */
1841
1842#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1843
1844#if defined(WORDS_BIGENDIAN)
1845
1846# define IS_LITTLE_ENDIAN   0
1847
1848/* We want the least significant byte of start_pos to also be the least
1849   significant byte of the cookie, which means that in big-endian mode we
1850   must copy the fields in reverse order. */
1851
1852# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1853# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1854# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1855# define OFF_CHARS_TO_SKIP  (sizeof(char))
1856# define OFF_NEED_EOF       0
1857
1858#else
1859
1860# define IS_LITTLE_ENDIAN   1
1861
1862/* Little-endian mode: the least significant byte of start_pos will
1863   naturally end up the least significant byte of the cookie. */
1864
1865# define OFF_START_POS      0
1866# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1867# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1868# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1869# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1870
1871#endif
1872
1873static int
1874textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1875{
1876    unsigned char buffer[COOKIE_BUF_LEN];
1877    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1878    if (cookieLong == NULL)
1879        return -1;
1880
1881    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1882                            IS_LITTLE_ENDIAN, 0) < 0) {
1883        Py_DECREF(cookieLong);
1884        return -1;
1885    }
1886    Py_DECREF(cookieLong);
1887
1888    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1889    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1890    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1891    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1892    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1893
1894    return 0;
1895}
1896
1897static PyObject *
1898textiowrapper_build_cookie(cookie_type *cookie)
1899{
1900    unsigned char buffer[COOKIE_BUF_LEN];
1901
1902    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1903    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1904    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1905    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1906    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1907
1908    return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1909}
1910#undef IS_LITTLE_ENDIAN
1911
1912static int
1913_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1914{
1915    PyObject *res;
1916    /* When seeking to the start of the stream, we call decoder.reset()
1917       rather than decoder.getstate().
1918       This is for a few decoders such as utf-16 for which the state value
1919       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1920       utf-16, that we are expecting a BOM).
1921    */
1922    if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1923        res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1924    else
1925        res = PyObject_CallMethod(self->decoder, "setstate",
1926                                  "((si))", "", cookie->dec_flags);
1927    if (res == NULL)
1928        return -1;
1929    Py_DECREF(res);
1930    return 0;
1931}
1932
1933static int
1934_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1935{
1936    PyObject *res;
1937    /* Same as _textiowrapper_decoder_setstate() above. */
1938    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1939        res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1940        self->encoding_start_of_stream = 1;
1941    }
1942    else {
1943        res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1944                                         _PyIO_zero, NULL);
1945        self->encoding_start_of_stream = 0;
1946    }
1947    if (res == NULL)
1948        return -1;
1949    Py_DECREF(res);
1950    return 0;
1951}
1952
1953static PyObject *
1954textiowrapper_seek(textio *self, PyObject *args)
1955{
1956    PyObject *cookieObj, *posobj;
1957    cookie_type cookie;
1958    int whence = 0;
1959    PyObject *res;
1960    int cmp;
1961
1962    CHECK_INITIALIZED(self);
1963
1964    if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1965        return NULL;
1966    CHECK_CLOSED(self);
1967
1968    Py_INCREF(cookieObj);
1969
1970    if (!self->seekable) {
1971        PyErr_SetString(PyExc_IOError,
1972                        "underlying stream is not seekable");
1973        goto fail;
1974    }
1975
1976    if (whence == 1) {
1977        /* seek relative to current position */
1978        cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1979        if (cmp < 0)
1980            goto fail;
1981
1982        if (cmp == 0) {
1983            PyErr_SetString(PyExc_IOError,
1984                            "can't do nonzero cur-relative seeks");
1985            goto fail;
1986        }
1987
1988        /* Seeking to the current position should attempt to
1989         * sync the underlying buffer with the current position.
1990         */
1991        Py_DECREF(cookieObj);
1992        cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1993        if (cookieObj == NULL)
1994            goto fail;
1995    }
1996    else if (whence == 2) {
1997        /* seek relative to end of file */
1998
1999        cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2000        if (cmp < 0)
2001            goto fail;
2002
2003        if (cmp == 0) {
2004            PyErr_SetString(PyExc_IOError,
2005                            "can't do nonzero end-relative seeks");
2006            goto fail;
2007        }
2008
2009        res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2010        if (res == NULL)
2011            goto fail;
2012        Py_DECREF(res);
2013
2014        textiowrapper_set_decoded_chars(self, NULL);
2015        Py_CLEAR(self->snapshot);
2016        if (self->decoder) {
2017            res = PyObject_CallMethod(self->decoder, "reset", NULL);
2018            if (res == NULL)
2019                goto fail;
2020            Py_DECREF(res);
2021        }
2022
2023        res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2024        Py_XDECREF(cookieObj);
2025        return res;
2026    }
2027    else if (whence != 0) {
2028        PyErr_Format(PyExc_ValueError,
2029                     "invalid whence (%d, should be 0, 1 or 2)", whence);
2030        goto fail;
2031    }
2032
2033    cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2034    if (cmp < 0)
2035        goto fail;
2036
2037    if (cmp == 1) {
2038        PyObject *repr = PyObject_Repr(cookieObj);
2039        if (repr != NULL) {
2040            PyErr_Format(PyExc_ValueError,
2041                         "negative seek position %s",
2042                         PyString_AS_STRING(repr));
2043            Py_DECREF(repr);
2044        }
2045        goto fail;
2046    }
2047
2048    res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2049    if (res == NULL)
2050        goto fail;
2051    Py_DECREF(res);
2052
2053    /* The strategy of seek() is to go back to the safe start point
2054     * and replay the effect of read(chars_to_skip) from there.
2055     */
2056    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2057        goto fail;
2058
2059    /* Seek back to the safe start point. */
2060    posobj = PyLong_FromOff_t(cookie.start_pos);
2061    if (posobj == NULL)
2062        goto fail;
2063    res = PyObject_CallMethodObjArgs(self->buffer,
2064                                     _PyIO_str_seek, posobj, NULL);
2065    Py_DECREF(posobj);
2066    if (res == NULL)
2067        goto fail;
2068    Py_DECREF(res);
2069
2070    textiowrapper_set_decoded_chars(self, NULL);
2071    Py_CLEAR(self->snapshot);
2072
2073    /* Restore the decoder to its state from the safe start point. */
2074    if (self->decoder) {
2075        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2076            goto fail;
2077    }
2078
2079    if (cookie.chars_to_skip) {
2080        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2081        PyObject *input_chunk = PyObject_CallMethod(
2082            self->buffer, "read", "i", cookie.bytes_to_feed);
2083        PyObject *decoded;
2084
2085        if (input_chunk == NULL)
2086            goto fail;
2087
2088        assert (PyBytes_Check(input_chunk));
2089
2090        self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2091        if (self->snapshot == NULL) {
2092            Py_DECREF(input_chunk);
2093            goto fail;
2094        }
2095
2096        decoded = PyObject_CallMethod(self->decoder, "decode",
2097                                      "Oi", input_chunk, (int)cookie.need_eof);
2098
2099        if (decoded == NULL)
2100            goto fail;
2101
2102        textiowrapper_set_decoded_chars(self, decoded);
2103
2104        /* Skip chars_to_skip of the decoded characters. */
2105        if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2106            PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2107            goto fail;
2108        }
2109        self->decoded_chars_used = cookie.chars_to_skip;
2110    }
2111    else {
2112        self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2113        if (self->snapshot == NULL)
2114            goto fail;
2115    }
2116
2117    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2118    if (self->encoder) {
2119        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2120            goto fail;
2121    }
2122    return cookieObj;
2123  fail:
2124    Py_XDECREF(cookieObj);
2125    return NULL;
2126
2127}
2128
2129static PyObject *
2130textiowrapper_tell(textio *self, PyObject *args)
2131{
2132    PyObject *res;
2133    PyObject *posobj = NULL;
2134    cookie_type cookie = {0,0,0,0,0};
2135    PyObject *next_input;
2136    Py_ssize_t chars_to_skip, chars_decoded;
2137    PyObject *saved_state = NULL;
2138    char *input, *input_end;
2139
2140    CHECK_INITIALIZED(self);
2141    CHECK_CLOSED(self);
2142
2143    if (!self->seekable) {
2144        PyErr_SetString(PyExc_IOError,
2145                        "underlying stream is not seekable");
2146        goto fail;
2147    }
2148    if (!self->telling) {
2149        PyErr_SetString(PyExc_IOError,
2150                        "telling position disabled by next() call");
2151        goto fail;
2152    }
2153
2154    if (_textiowrapper_writeflush(self) < 0)
2155        return NULL;
2156    res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2157    if (res == NULL)
2158        goto fail;
2159    Py_DECREF(res);
2160
2161    posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2162    if (posobj == NULL)
2163        goto fail;
2164
2165    if (self->decoder == NULL || self->snapshot == NULL) {
2166        assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2167        return posobj;
2168    }
2169
2170#if defined(HAVE_LARGEFILE_SUPPORT)
2171    cookie.start_pos = PyLong_AsLongLong(posobj);
2172#else
2173    cookie.start_pos = PyLong_AsLong(posobj);
2174#endif
2175    if (PyErr_Occurred())
2176        goto fail;
2177
2178    /* Skip backward to the snapshot point (see _read_chunk). */
2179    if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2180        goto fail;
2181
2182    assert (PyBytes_Check(next_input));
2183
2184    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2185
2186    /* How many decoded characters have been used up since the snapshot? */
2187    if (self->decoded_chars_used == 0)  {
2188        /* We haven't moved from the snapshot point. */
2189        Py_DECREF(posobj);
2190        return textiowrapper_build_cookie(&cookie);
2191    }
2192
2193    chars_to_skip = self->decoded_chars_used;
2194
2195    /* Starting from the snapshot position, we will walk the decoder
2196     * forward until it gives us enough decoded characters.
2197     */
2198    saved_state = PyObject_CallMethodObjArgs(self->decoder,
2199                                             _PyIO_str_getstate, NULL);
2200    if (saved_state == NULL)
2201        goto fail;
2202
2203    /* Note our initial start point. */
2204    if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2205        goto fail;
2206
2207    /* Feed the decoder one byte at a time.  As we go, note the
2208     * nearest "safe start point" before the current location
2209     * (a point where the decoder has nothing buffered, so seek()
2210     * can safely start from there and advance to this location).
2211     */
2212    chars_decoded = 0;
2213    input = PyBytes_AS_STRING(next_input);
2214    input_end = input + PyBytes_GET_SIZE(next_input);
2215    while (input < input_end) {
2216        PyObject *state;
2217        char *dec_buffer;
2218        Py_ssize_t dec_buffer_len;
2219        int dec_flags;
2220
2221        PyObject *decoded = PyObject_CallMethod(
2222            self->decoder, "decode", "s#", input, 1);
2223        if (decoded == NULL)
2224            goto fail;
2225        assert (PyUnicode_Check(decoded));
2226        chars_decoded += PyUnicode_GET_SIZE(decoded);
2227        Py_DECREF(decoded);
2228
2229        cookie.bytes_to_feed += 1;
2230
2231        state = PyObject_CallMethodObjArgs(self->decoder,
2232                                           _PyIO_str_getstate, NULL);
2233        if (state == NULL)
2234            goto fail;
2235        if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2236            Py_DECREF(state);
2237            goto fail;
2238        }
2239        Py_DECREF(state);
2240
2241        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2242            /* Decoder buffer is empty, so this is a safe start point. */
2243            cookie.start_pos += cookie.bytes_to_feed;
2244            chars_to_skip -= chars_decoded;
2245            cookie.dec_flags = dec_flags;
2246            cookie.bytes_to_feed = 0;
2247            chars_decoded = 0;
2248        }
2249        if (chars_decoded >= chars_to_skip)
2250            break;
2251        input++;
2252    }
2253    if (input == input_end) {
2254        /* We didn't get enough decoded data; signal EOF to get more. */
2255        PyObject *decoded = PyObject_CallMethod(
2256            self->decoder, "decode", "si", "", /* final = */ 1);
2257        if (decoded == NULL)
2258            goto fail;
2259        assert (PyUnicode_Check(decoded));
2260        chars_decoded += PyUnicode_GET_SIZE(decoded);
2261        Py_DECREF(decoded);
2262        cookie.need_eof = 1;
2263
2264        if (chars_decoded < chars_to_skip) {
2265            PyErr_SetString(PyExc_IOError,
2266                            "can't reconstruct logical file position");
2267            goto fail;
2268        }
2269    }
2270
2271    /* finally */
2272    Py_XDECREF(posobj);
2273    res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2274    Py_DECREF(saved_state);
2275    if (res == NULL)
2276        return NULL;
2277    Py_DECREF(res);
2278
2279    /* The returned cookie corresponds to the last safe start point. */
2280    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2281    return textiowrapper_build_cookie(&cookie);
2282
2283  fail:
2284    Py_XDECREF(posobj);
2285    if (saved_state) {
2286        PyObject *type, *value, *traceback;
2287        PyErr_Fetch(&type, &value, &traceback);
2288
2289        res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2290        Py_DECREF(saved_state);
2291        if (res == NULL)
2292            return NULL;
2293        Py_DECREF(res);
2294
2295        PyErr_Restore(type, value, traceback);
2296    }
2297    return NULL;
2298}
2299
2300static PyObject *
2301textiowrapper_truncate(textio *self, PyObject *args)
2302{
2303    PyObject *pos = Py_None;
2304    PyObject *res;
2305
2306    CHECK_INITIALIZED(self)
2307    if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2308        return NULL;
2309    }
2310
2311    res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2312    if (res == NULL)
2313        return NULL;
2314    Py_DECREF(res);
2315
2316    return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2317}
2318
2319static PyObject *
2320textiowrapper_repr(textio *self)
2321{
2322    PyObject *nameobj, *res;
2323    PyObject *namerepr = NULL, *encrepr = NULL;
2324
2325    CHECK_INITIALIZED(self);
2326
2327    nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2328    if (nameobj == NULL) {
2329        if (PyErr_ExceptionMatches(PyExc_AttributeError))
2330            PyErr_Clear();
2331        else
2332            goto error;
2333        encrepr = PyObject_Repr(self->encoding);
2334        res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2335                                   PyString_AS_STRING(encrepr));
2336    }
2337    else {
2338        encrepr = PyObject_Repr(self->encoding);
2339        namerepr = PyObject_Repr(nameobj);
2340        res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2341                                   PyString_AS_STRING(namerepr),
2342                                   PyString_AS_STRING(encrepr));
2343        Py_DECREF(nameobj);
2344    }
2345    Py_XDECREF(namerepr);
2346    Py_XDECREF(encrepr);
2347    return res;
2348
2349error:
2350    Py_XDECREF(namerepr);
2351    Py_XDECREF(encrepr);
2352    return NULL;
2353}
2354
2355
2356/* Inquiries */
2357
2358static PyObject *
2359textiowrapper_fileno(textio *self, PyObject *args)
2360{
2361    CHECK_INITIALIZED(self);
2362    return PyObject_CallMethod(self->buffer, "fileno", NULL);
2363}
2364
2365static PyObject *
2366textiowrapper_seekable(textio *self, PyObject *args)
2367{
2368    CHECK_INITIALIZED(self);
2369    return PyObject_CallMethod(self->buffer, "seekable", NULL);
2370}
2371
2372static PyObject *
2373textiowrapper_readable(textio *self, PyObject *args)
2374{
2375    CHECK_INITIALIZED(self);
2376    return PyObject_CallMethod(self->buffer, "readable", NULL);
2377}
2378
2379static PyObject *
2380textiowrapper_writable(textio *self, PyObject *args)
2381{
2382    CHECK_INITIALIZED(self);
2383    return PyObject_CallMethod(self->buffer, "writable", NULL);
2384}
2385
2386static PyObject *
2387textiowrapper_isatty(textio *self, PyObject *args)
2388{
2389    CHECK_INITIALIZED(self);
2390    return PyObject_CallMethod(self->buffer, "isatty", NULL);
2391}
2392
2393static PyObject *
2394textiowrapper_flush(textio *self, PyObject *args)
2395{
2396    CHECK_INITIALIZED(self);
2397    CHECK_CLOSED(self);
2398    self->telling = self->seekable;
2399    if (_textiowrapper_writeflush(self) < 0)
2400        return NULL;
2401    return PyObject_CallMethod(self->buffer, "flush", NULL);
2402}
2403
2404static PyObject *
2405textiowrapper_close(textio *self, PyObject *args)
2406{
2407    PyObject *res;
2408    int r;
2409    CHECK_INITIALIZED(self);
2410
2411    res = textiowrapper_closed_get(self, NULL);
2412    if (res == NULL)
2413        return NULL;
2414    r = PyObject_IsTrue(res);
2415    Py_DECREF(res);
2416    if (r < 0)
2417        return NULL;
2418
2419    if (r > 0) {
2420        Py_RETURN_NONE; /* stream already closed */
2421    }
2422    else {
2423        res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2424        if (res == NULL) {
2425            return NULL;
2426        }
2427        else
2428            Py_DECREF(res);
2429
2430        return PyObject_CallMethod(self->buffer, "close", NULL);
2431    }
2432}
2433
2434static PyObject *
2435textiowrapper_iternext(textio *self)
2436{
2437    PyObject *line;
2438
2439    CHECK_INITIALIZED(self);
2440
2441    self->telling = 0;
2442    if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2443        /* Skip method call overhead for speed */
2444        line = _textiowrapper_readline(self, -1);
2445    }
2446    else {
2447        line = PyObject_CallMethodObjArgs((PyObject *)self,
2448                                           _PyIO_str_readline, NULL);
2449        if (line && !PyUnicode_Check(line)) {
2450            PyErr_Format(PyExc_IOError,
2451                         "readline() should have returned an str object, "
2452                         "not '%.200s'", Py_TYPE(line)->tp_name);
2453            Py_DECREF(line);
2454            return NULL;
2455        }
2456    }
2457
2458    if (line == NULL)
2459        return NULL;
2460
2461    if (PyUnicode_GET_SIZE(line) == 0) {
2462        /* Reached EOF or would have blocked */
2463        Py_DECREF(line);
2464        Py_CLEAR(self->snapshot);
2465        self->telling = self->seekable;
2466        return NULL;
2467    }
2468
2469    return line;
2470}
2471
2472static PyObject *
2473textiowrapper_name_get(textio *self, void *context)
2474{
2475    CHECK_INITIALIZED(self);
2476    return PyObject_GetAttrString(self->buffer, "name");
2477}
2478
2479static PyObject *
2480textiowrapper_closed_get(textio *self, void *context)
2481{
2482    CHECK_INITIALIZED(self);
2483    return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2484}
2485
2486static PyObject *
2487textiowrapper_newlines_get(textio *self, void *context)
2488{
2489    PyObject *res;
2490    CHECK_INITIALIZED(self);
2491    if (self->decoder == NULL)
2492        Py_RETURN_NONE;
2493    res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2494    if (res == NULL) {
2495        if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2496            PyErr_Clear();
2497            Py_RETURN_NONE;
2498        }
2499        else {
2500            return NULL;
2501        }
2502    }
2503    return res;
2504}
2505
2506static PyObject *
2507textiowrapper_errors_get(textio *self, void *context)
2508{
2509    CHECK_INITIALIZED(self);
2510    Py_INCREF(self->errors);
2511    return self->errors;
2512}
2513
2514static PyObject *
2515textiowrapper_chunk_size_get(textio *self, void *context)
2516{
2517    CHECK_INITIALIZED(self);
2518    return PyLong_FromSsize_t(self->chunk_size);
2519}
2520
2521static int
2522textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2523{
2524    Py_ssize_t n;
2525    CHECK_INITIALIZED_INT(self);
2526    n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2527    if (n == -1 && PyErr_Occurred())
2528        return -1;
2529    if (n <= 0) {
2530        PyErr_SetString(PyExc_ValueError,
2531                        "a strictly positive integer is required");
2532        return -1;
2533    }
2534    self->chunk_size = n;
2535    return 0;
2536}
2537
2538static PyMethodDef textiowrapper_methods[] = {
2539    {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2540    {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2541    {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2542    {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2543    {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2544    {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2545
2546    {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2547    {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2548    {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2549    {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2550    {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2551
2552    {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2553    {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2554    {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2555    {NULL, NULL}
2556};
2557
2558static PyMemberDef textiowrapper_members[] = {
2559    {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2560    {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2561    {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2562    {NULL}
2563};
2564
2565static PyGetSetDef textiowrapper_getset[] = {
2566    {"name", (getter)textiowrapper_name_get, NULL, NULL},
2567    {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2568/*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2569*/
2570    {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2571    {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2572    {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2573                    (setter)textiowrapper_chunk_size_set, NULL},
2574    {NULL}
2575};
2576
2577PyTypeObject PyTextIOWrapper_Type = {
2578    PyVarObject_HEAD_INIT(NULL, 0)
2579    "_io.TextIOWrapper",        /*tp_name*/
2580    sizeof(textio), /*tp_basicsize*/
2581    0,                          /*tp_itemsize*/
2582    (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2583    0,                          /*tp_print*/
2584    0,                          /*tp_getattr*/
2585    0,                          /*tps_etattr*/
2586    0,                          /*tp_compare */
2587    (reprfunc)textiowrapper_repr,/*tp_repr*/
2588    0,                          /*tp_as_number*/
2589    0,                          /*tp_as_sequence*/
2590    0,                          /*tp_as_mapping*/
2591    0,                          /*tp_hash */
2592    0,                          /*tp_call*/
2593    0,                          /*tp_str*/
2594    0,                          /*tp_getattro*/
2595    0,                          /*tp_setattro*/
2596    0,                          /*tp_as_buffer*/
2597    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2598            | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2599    textiowrapper_doc,          /* tp_doc */
2600    (traverseproc)textiowrapper_traverse, /* tp_traverse */
2601    (inquiry)textiowrapper_clear, /* tp_clear */
2602    0,                          /* tp_richcompare */
2603    offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2604    0,                          /* tp_iter */
2605    (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2606    textiowrapper_methods,      /* tp_methods */
2607    textiowrapper_members,      /* tp_members */
2608    textiowrapper_getset,       /* tp_getset */
2609    0,                          /* tp_base */
2610    0,                          /* tp_dict */
2611    0,                          /* tp_descr_get */
2612    0,                          /* tp_descr_set */
2613    offsetof(textio, dict), /*tp_dictoffset*/
2614    (initproc)textiowrapper_init, /* tp_init */
2615    0,                          /* tp_alloc */
2616    PyType_GenericNew,          /* tp_new */
2617};
2618