1/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 */
6
7#define PY_SSIZE_T_CLEAN
8#include "Python.h"
9#include "structmember.h"
10#include "multibytecodec.h"
11
12typedef struct {
13    const Py_UNICODE    *inbuf, *inbuf_top, *inbuf_end;
14    unsigned char       *outbuf, *outbuf_end;
15    PyObject            *excobj, *outobj;
16} MultibyteEncodeBuffer;
17
18typedef struct {
19    const unsigned char *inbuf, *inbuf_top, *inbuf_end;
20    Py_UNICODE          *outbuf, *outbuf_end;
21    PyObject            *excobj, *outobj;
22} MultibyteDecodeBuffer;
23
24PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
25"I.encode(unicode[, errors]) -> (string, length consumed)\n\
26\n\
27Return an encoded string version of `unicode'. errors may be given to\n\
28set a different error handling scheme. Default is 'strict' meaning that\n\
29encoding errors raise a UnicodeEncodeError. Other possible values are\n\
30'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
31registered with codecs.register_error that can handle UnicodeEncodeErrors.");
32
33PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
34"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
35\n\
36Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
37to set a different error handling scheme. Default is 'strict' meaning\n\
38that encoding errors raise a UnicodeDecodeError. Other possible values\n\
39are 'ignore' and 'replace' as well as any other name registered with\n\
40codecs.register_error that is able to handle UnicodeDecodeErrors.");
41
42static char *codeckwarglist[] = {"input", "errors", NULL};
43static char *incnewkwarglist[] = {"errors", NULL};
44static char *incrementalkwarglist[] = {"input", "final", NULL};
45static char *streamkwarglist[] = {"stream", "errors", NULL};
46
47static PyObject *multibytecodec_encode(MultibyteCodec *,
48                MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
49                PyObject *, int);
50
51#define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
52
53static PyObject *
54make_tuple(PyObject *object, Py_ssize_t len)
55{
56    PyObject *v, *w;
57
58    if (object == NULL)
59        return NULL;
60
61    v = PyTuple_New(2);
62    if (v == NULL) {
63        Py_DECREF(object);
64        return NULL;
65    }
66    PyTuple_SET_ITEM(v, 0, object);
67
68    w = PyInt_FromSsize_t(len);
69    if (w == NULL) {
70        Py_DECREF(v);
71        return NULL;
72    }
73    PyTuple_SET_ITEM(v, 1, w);
74
75    return v;
76}
77
78static PyObject *
79internal_error_callback(const char *errors)
80{
81    if (errors == NULL || strcmp(errors, "strict") == 0)
82        return ERROR_STRICT;
83    else if (strcmp(errors, "ignore") == 0)
84        return ERROR_IGNORE;
85    else if (strcmp(errors, "replace") == 0)
86        return ERROR_REPLACE;
87    else
88        return PyString_FromString(errors);
89}
90
91static PyObject *
92call_error_callback(PyObject *errors, PyObject *exc)
93{
94    PyObject *args, *cb, *r;
95
96    assert(PyString_Check(errors));
97    cb = PyCodec_LookupError(PyString_AS_STRING(errors));
98    if (cb == NULL)
99        return NULL;
100
101    args = PyTuple_New(1);
102    if (args == NULL) {
103        Py_DECREF(cb);
104        return NULL;
105    }
106
107    PyTuple_SET_ITEM(args, 0, exc);
108    Py_INCREF(exc);
109
110    r = PyObject_CallObject(cb, args);
111    Py_DECREF(args);
112    Py_DECREF(cb);
113    return r;
114}
115
116static PyObject *
117codecctx_errors_get(MultibyteStatefulCodecContext *self)
118{
119    const char *errors;
120
121    if (self->errors == ERROR_STRICT)
122        errors = "strict";
123    else if (self->errors == ERROR_IGNORE)
124        errors = "ignore";
125    else if (self->errors == ERROR_REPLACE)
126        errors = "replace";
127    else {
128        Py_INCREF(self->errors);
129        return self->errors;
130    }
131
132    return PyString_FromString(errors);
133}
134
135static int
136codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
137                    void *closure)
138{
139    PyObject *cb;
140
141    if (!PyString_Check(value)) {
142        PyErr_SetString(PyExc_TypeError, "errors must be a string");
143        return -1;
144    }
145
146    cb = internal_error_callback(PyString_AS_STRING(value));
147    if (cb == NULL)
148        return -1;
149
150    ERROR_DECREF(self->errors);
151    self->errors = cb;
152    return 0;
153}
154
155/* This getset handlers list is used by all the stateful codec objects */
156static PyGetSetDef codecctx_getsets[] = {
157    {"errors",          (getter)codecctx_errors_get,
158                    (setter)codecctx_errors_set,
159                    PyDoc_STR("how to treat errors")},
160    {NULL,}
161};
162
163static int
164expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
165{
166    Py_ssize_t orgpos, orgsize, incsize;
167
168    orgpos = (Py_ssize_t)((char *)buf->outbuf -
169                            PyString_AS_STRING(buf->outobj));
170    orgsize = PyString_GET_SIZE(buf->outobj);
171    incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
172
173    if (orgsize > PY_SSIZE_T_MAX - incsize) {
174        PyErr_NoMemory();
175        return -1;
176    }
177
178    if (_PyString_Resize(&buf->outobj, orgsize + incsize) == -1)
179        return -1;
180
181    buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
182    buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
183        + PyString_GET_SIZE(buf->outobj);
184
185    return 0;
186}
187#define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
188    if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
189        if (expand_encodebuffer(buf, s) == -1)                          \
190            goto errorexit;                                             \
191} while(0)
192
193static int
194expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
195{
196    Py_ssize_t orgpos, orgsize;
197
198    orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
199    orgsize = PyUnicode_GET_SIZE(buf->outobj);
200    if (PyUnicode_Resize(&buf->outobj, orgsize + (
201        esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
202        return -1;
203
204    buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
205    buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
206                      + PyUnicode_GET_SIZE(buf->outobj);
207
208    return 0;
209}
210#define REQUIRE_DECODEBUFFER(buf, s) do {                               \
211    if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
212        if (expand_decodebuffer(buf, s) == -1)                          \
213            goto errorexit;                                             \
214} while(0)
215
216
217/**
218 * MultibyteCodec object
219 */
220
221static int
222multibytecodec_encerror(MultibyteCodec *codec,
223                        MultibyteCodec_State *state,
224                        MultibyteEncodeBuffer *buf,
225                        PyObject *errors, Py_ssize_t e)
226{
227    PyObject *retobj = NULL, *retstr = NULL, *tobj;
228    Py_ssize_t retstrsize, newpos;
229    Py_ssize_t esize, start, end;
230    const char *reason;
231
232    if (e > 0) {
233        reason = "illegal multibyte sequence";
234        esize = e;
235    }
236    else {
237        switch (e) {
238        case MBERR_TOOSMALL:
239            REQUIRE_ENCODEBUFFER(buf, -1);
240            return 0; /* retry it */
241        case MBERR_TOOFEW:
242            reason = "incomplete multibyte sequence";
243            esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
244            break;
245        case MBERR_INTERNAL:
246            PyErr_SetString(PyExc_RuntimeError,
247                            "internal codec error");
248            return -1;
249        default:
250            PyErr_SetString(PyExc_RuntimeError,
251                            "unknown runtime error");
252            return -1;
253        }
254    }
255
256    if (errors == ERROR_REPLACE) {
257        const Py_UNICODE replchar = '?', *inbuf = &replchar;
258        Py_ssize_t r;
259
260        for (;;) {
261            Py_ssize_t outleft;
262
263            outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
264            r = codec->encode(state, codec->config, &inbuf, 1,
265                              &buf->outbuf, outleft, 0);
266            if (r == MBERR_TOOSMALL) {
267                REQUIRE_ENCODEBUFFER(buf, -1);
268                continue;
269            }
270            else
271                break;
272        }
273
274        if (r != 0) {
275            REQUIRE_ENCODEBUFFER(buf, 1);
276            *buf->outbuf++ = '?';
277        }
278    }
279    if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
280        buf->inbuf += esize;
281        return 0;
282    }
283
284    start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
285    end = start + esize;
286
287    /* use cached exception object if available */
288    if (buf->excobj == NULL) {
289        buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
290                        buf->inbuf_top,
291                        buf->inbuf_end - buf->inbuf_top,
292                        start, end, reason);
293        if (buf->excobj == NULL)
294            goto errorexit;
295    }
296    else
297        if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
298            PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
299            PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
300            goto errorexit;
301
302    if (errors == ERROR_STRICT) {
303        PyCodec_StrictErrors(buf->excobj);
304        goto errorexit;
305    }
306
307    retobj = call_error_callback(errors, buf->excobj);
308    if (retobj == NULL)
309        goto errorexit;
310
311    if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
312        !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
313        !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||
314          PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {
315        PyErr_SetString(PyExc_TypeError,
316                        "encoding error handler must return "
317                        "(unicode, int) tuple");
318        goto errorexit;
319    }
320
321    {
322        const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
323
324        retstr = multibytecodec_encode(codec, state, &uraw,
325                        PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
326                        MBENC_FLUSH);
327        if (retstr == NULL)
328            goto errorexit;
329    }
330
331    retstrsize = PyString_GET_SIZE(retstr);
332    if (retstrsize > 0) {
333        REQUIRE_ENCODEBUFFER(buf, retstrsize);
334        memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
335        buf->outbuf += retstrsize;
336    }
337
338    newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
339    if (newpos < 0 && !PyErr_Occurred())
340        newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
341    if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
342        PyErr_Clear();
343        PyErr_Format(PyExc_IndexError,
344                     "position %zd from error handler out of bounds",
345                     newpos);
346        goto errorexit;
347    }
348    buf->inbuf = buf->inbuf_top + newpos;
349
350    Py_DECREF(retobj);
351    Py_DECREF(retstr);
352    return 0;
353
354errorexit:
355    Py_XDECREF(retobj);
356    Py_XDECREF(retstr);
357    return -1;
358}
359
360static int
361multibytecodec_decerror(MultibyteCodec *codec,
362                        MultibyteCodec_State *state,
363                        MultibyteDecodeBuffer *buf,
364                        PyObject *errors, Py_ssize_t e)
365{
366    PyObject *retobj = NULL, *retuni = NULL;
367    Py_ssize_t retunisize, newpos;
368    const char *reason;
369    Py_ssize_t esize, start, end;
370
371    if (e > 0) {
372        reason = "illegal multibyte sequence";
373        esize = e;
374    }
375    else {
376        switch (e) {
377        case MBERR_TOOSMALL:
378            REQUIRE_DECODEBUFFER(buf, -1);
379            return 0; /* retry it */
380        case MBERR_TOOFEW:
381            reason = "incomplete multibyte sequence";
382            esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
383            break;
384        case MBERR_INTERNAL:
385            PyErr_SetString(PyExc_RuntimeError,
386                            "internal codec error");
387            return -1;
388        default:
389            PyErr_SetString(PyExc_RuntimeError,
390                            "unknown runtime error");
391            return -1;
392        }
393    }
394
395    if (errors == ERROR_REPLACE) {
396        REQUIRE_DECODEBUFFER(buf, 1);
397        *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
398    }
399    if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
400        buf->inbuf += esize;
401        return 0;
402    }
403
404    start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
405    end = start + esize;
406
407    /* use cached exception object if available */
408    if (buf->excobj == NULL) {
409        buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
410                        (const char *)buf->inbuf_top,
411                        (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
412                        start, end, reason);
413        if (buf->excobj == NULL)
414            goto errorexit;
415    }
416    else
417        if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
418            PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
419            PyUnicodeDecodeError_SetReason(buf->excobj, reason))
420            goto errorexit;
421
422    if (errors == ERROR_STRICT) {
423        PyCodec_StrictErrors(buf->excobj);
424        goto errorexit;
425    }
426
427    retobj = call_error_callback(errors, buf->excobj);
428    if (retobj == NULL)
429        goto errorexit;
430
431    if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
432        !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
433        !(PyInt_Check(PyTuple_GET_ITEM(retobj, 1)) ||
434          PyLong_Check(PyTuple_GET_ITEM(retobj, 1)))) {
435        PyErr_SetString(PyExc_TypeError,
436                        "decoding error handler must return "
437                        "(unicode, int) tuple");
438        goto errorexit;
439    }
440
441    retunisize = PyUnicode_GET_SIZE(retuni);
442    if (retunisize > 0) {
443        REQUIRE_DECODEBUFFER(buf, retunisize);
444        memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
445                        retunisize * Py_UNICODE_SIZE);
446        buf->outbuf += retunisize;
447    }
448
449    newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
450    if (newpos < 0 && !PyErr_Occurred())
451        newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
452    if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
453        PyErr_Clear();
454        PyErr_Format(PyExc_IndexError,
455                     "position %zd from error handler out of bounds",
456                     newpos);
457        goto errorexit;
458    }
459    buf->inbuf = buf->inbuf_top + newpos;
460    Py_DECREF(retobj);
461    return 0;
462
463errorexit:
464    Py_XDECREF(retobj);
465    return -1;
466}
467
468static PyObject *
469multibytecodec_encode(MultibyteCodec *codec,
470                      MultibyteCodec_State *state,
471                      const Py_UNICODE **data, Py_ssize_t datalen,
472                      PyObject *errors, int flags)
473{
474    MultibyteEncodeBuffer buf;
475    Py_ssize_t finalsize, r = 0;
476
477    if (datalen == 0 && !(flags & MBENC_RESET))
478        return PyString_FromString("");
479
480    buf.excobj = NULL;
481    buf.inbuf = buf.inbuf_top = *data;
482    buf.inbuf_end = buf.inbuf_top + datalen;
483
484    if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
485        PyErr_NoMemory();
486        goto errorexit;
487    }
488
489    buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
490    if (buf.outobj == NULL)
491        goto errorexit;
492    buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
493    buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
494
495    while (buf.inbuf < buf.inbuf_end) {
496        Py_ssize_t inleft, outleft;
497
498        /* we don't reuse inleft and outleft here.
499         * error callbacks can relocate the cursor anywhere on buffer*/
500        inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
501        outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
502        r = codec->encode(state, codec->config, &buf.inbuf, inleft,
503                          &buf.outbuf, outleft, flags);
504        if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
505            break;
506        else if (multibytecodec_encerror(codec, state, &buf, errors,r))
507            goto errorexit;
508        else if (r == MBERR_TOOFEW)
509            break;
510    }
511
512    if (codec->encreset != NULL && (flags & MBENC_RESET))
513        for (;;) {
514            Py_ssize_t outleft;
515
516            outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
517            r = codec->encreset(state, codec->config, &buf.outbuf,
518                                outleft);
519            if (r == 0)
520                break;
521            else if (multibytecodec_encerror(codec, state,
522                                             &buf, errors, r))
523                goto errorexit;
524        }
525
526    finalsize = (Py_ssize_t)((char *)buf.outbuf -
527                             PyString_AS_STRING(buf.outobj));
528
529    if (finalsize != PyString_GET_SIZE(buf.outobj))
530        if (_PyString_Resize(&buf.outobj, finalsize) == -1)
531            goto errorexit;
532
533	*data = buf.inbuf;
534    Py_XDECREF(buf.excobj);
535    return buf.outobj;
536
537errorexit:
538    Py_XDECREF(buf.excobj);
539    Py_XDECREF(buf.outobj);
540    return NULL;
541}
542
543static PyObject *
544MultibyteCodec_Encode(MultibyteCodecObject *self,
545                      PyObject *args, PyObject *kwargs)
546{
547    MultibyteCodec_State state;
548    Py_UNICODE *data;
549    PyObject *errorcb, *r, *arg, *ucvt;
550    const char *errors = NULL;
551    Py_ssize_t datalen;
552
553    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
554                            codeckwarglist, &arg, &errors))
555        return NULL;
556
557    if (PyUnicode_Check(arg))
558        ucvt = NULL;
559    else {
560        arg = ucvt = PyObject_Unicode(arg);
561        if (arg == NULL)
562            return NULL;
563        else if (!PyUnicode_Check(arg)) {
564            PyErr_SetString(PyExc_TypeError,
565                "couldn't convert the object to unicode.");
566            Py_DECREF(ucvt);
567            return NULL;
568        }
569    }
570
571    data = PyUnicode_AS_UNICODE(arg);
572    datalen = PyUnicode_GET_SIZE(arg);
573
574    errorcb = internal_error_callback(errors);
575    if (errorcb == NULL) {
576        Py_XDECREF(ucvt);
577        return NULL;
578    }
579
580    if (self->codec->encinit != NULL &&
581        self->codec->encinit(&state, self->codec->config) != 0)
582        goto errorexit;
583    r = multibytecodec_encode(self->codec, &state,
584                    (const Py_UNICODE **)&data, datalen, errorcb,
585                    MBENC_FLUSH | MBENC_RESET);
586    if (r == NULL)
587        goto errorexit;
588
589    ERROR_DECREF(errorcb);
590    Py_XDECREF(ucvt);
591    return make_tuple(r, datalen);
592
593errorexit:
594    ERROR_DECREF(errorcb);
595    Py_XDECREF(ucvt);
596    return NULL;
597}
598
599static PyObject *
600MultibyteCodec_Decode(MultibyteCodecObject *self,
601                      PyObject *args, PyObject *kwargs)
602{
603    MultibyteCodec_State state;
604    MultibyteDecodeBuffer buf;
605    PyObject *errorcb;
606    Py_buffer pdata;
607    const char *data, *errors = NULL;
608    Py_ssize_t datalen, finalsize;
609
610    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|z:decode",
611                            codeckwarglist, &pdata, &errors))
612        return NULL;
613    data = pdata.buf;
614    datalen = pdata.len;
615
616    errorcb = internal_error_callback(errors);
617    if (errorcb == NULL) {
618        PyBuffer_Release(&pdata);
619        return NULL;
620    }
621
622    if (datalen == 0) {
623        PyBuffer_Release(&pdata);
624        ERROR_DECREF(errorcb);
625        return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
626    }
627
628    buf.excobj = NULL;
629    buf.inbuf = buf.inbuf_top = (unsigned char *)data;
630    buf.inbuf_end = buf.inbuf_top + datalen;
631    buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
632    if (buf.outobj == NULL)
633        goto errorexit;
634    buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
635    buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
636
637    if (self->codec->decinit != NULL &&
638        self->codec->decinit(&state, self->codec->config) != 0)
639        goto errorexit;
640
641    while (buf.inbuf < buf.inbuf_end) {
642        Py_ssize_t inleft, outleft, r;
643
644        inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
645        outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
646
647        r = self->codec->decode(&state, self->codec->config,
648                        &buf.inbuf, inleft, &buf.outbuf, outleft);
649        if (r == 0)
650            break;
651        else if (multibytecodec_decerror(self->codec, &state,
652                                         &buf, errorcb, r))
653            goto errorexit;
654    }
655
656    finalsize = (Py_ssize_t)(buf.outbuf -
657                             PyUnicode_AS_UNICODE(buf.outobj));
658
659    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
660        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
661            goto errorexit;
662
663    PyBuffer_Release(&pdata);
664    Py_XDECREF(buf.excobj);
665    ERROR_DECREF(errorcb);
666    return make_tuple(buf.outobj, datalen);
667
668errorexit:
669    PyBuffer_Release(&pdata);
670    ERROR_DECREF(errorcb);
671    Py_XDECREF(buf.excobj);
672    Py_XDECREF(buf.outobj);
673
674    return NULL;
675}
676
677static struct PyMethodDef multibytecodec_methods[] = {
678    {"encode",          (PyCFunction)MultibyteCodec_Encode,
679                    METH_VARARGS | METH_KEYWORDS,
680                    MultibyteCodec_Encode__doc__},
681    {"decode",          (PyCFunction)MultibyteCodec_Decode,
682                    METH_VARARGS | METH_KEYWORDS,
683                    MultibyteCodec_Decode__doc__},
684    {NULL,              NULL},
685};
686
687static void
688multibytecodec_dealloc(MultibyteCodecObject *self)
689{
690    PyObject_Del(self);
691}
692
693static PyTypeObject MultibyteCodec_Type = {
694    PyVarObject_HEAD_INIT(NULL, 0)
695    "MultibyteCodec",                   /* tp_name */
696    sizeof(MultibyteCodecObject),       /* tp_basicsize */
697    0,                                  /* tp_itemsize */
698    /* methods */
699    (destructor)multibytecodec_dealloc, /* tp_dealloc */
700    0,                                  /* tp_print */
701    0,                                  /* tp_getattr */
702    0,                                  /* tp_setattr */
703    0,                                  /* tp_compare */
704    0,                                  /* tp_repr */
705    0,                                  /* tp_as_number */
706    0,                                  /* tp_as_sequence */
707    0,                                  /* tp_as_mapping */
708    0,                                  /* tp_hash */
709    0,                                  /* tp_call */
710    0,                                  /* tp_str */
711    PyObject_GenericGetAttr,            /* tp_getattro */
712    0,                                  /* tp_setattro */
713    0,                                  /* tp_as_buffer */
714    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
715    0,                                  /* tp_doc */
716    0,                                  /* tp_traverse */
717    0,                                  /* tp_clear */
718    0,                                  /* tp_richcompare */
719    0,                                  /* tp_weaklistoffset */
720    0,                                  /* tp_iter */
721    0,                                  /* tp_iterext */
722    multibytecodec_methods,             /* tp_methods */
723};
724
725
726/**
727 * Utility functions for stateful codec mechanism
728 */
729
730#define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
731#define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
732
733static PyObject *
734encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
735                        PyObject *unistr, int final)
736{
737    PyObject *ucvt, *r = NULL;
738    Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
739    Py_ssize_t datalen, origpending;
740
741    if (PyUnicode_Check(unistr))
742        ucvt = NULL;
743    else {
744        unistr = ucvt = PyObject_Unicode(unistr);
745        if (unistr == NULL)
746            return NULL;
747        else if (!PyUnicode_Check(unistr)) {
748            PyErr_SetString(PyExc_TypeError,
749                "couldn't convert the object to unicode.");
750            Py_DECREF(ucvt);
751            return NULL;
752        }
753    }
754
755    datalen = PyUnicode_GET_SIZE(unistr);
756    origpending = ctx->pendingsize;
757
758    if (origpending > 0) {
759        if (datalen > PY_SSIZE_T_MAX - ctx->pendingsize) {
760            PyErr_NoMemory();
761            /* inbuf_tmp == NULL */
762            goto errorexit;
763        }
764        inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
765        if (inbuf_tmp == NULL)
766            goto errorexit;
767        memcpy(inbuf_tmp, ctx->pending,
768            Py_UNICODE_SIZE * ctx->pendingsize);
769        memcpy(inbuf_tmp + ctx->pendingsize,
770            PyUnicode_AS_UNICODE(unistr),
771            Py_UNICODE_SIZE * datalen);
772        datalen += ctx->pendingsize;
773        ctx->pendingsize = 0;
774        inbuf = inbuf_tmp;
775    }
776    else
777        inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
778
779    inbuf_end = inbuf + datalen;
780
781    r = multibytecodec_encode(ctx->codec, &ctx->state,
782                    (const Py_UNICODE **)&inbuf, datalen,
783                    ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
784    if (r == NULL) {
785        /* recover the original pending buffer */
786        if (origpending > 0)
787            memcpy(ctx->pending, inbuf_tmp,
788                Py_UNICODE_SIZE * origpending);
789        ctx->pendingsize = origpending;
790        goto errorexit;
791    }
792
793    if (inbuf < inbuf_end) {
794        ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
795        if (ctx->pendingsize > MAXENCPENDING) {
796            /* normal codecs can't reach here */
797            ctx->pendingsize = 0;
798            PyErr_SetString(PyExc_UnicodeError,
799                            "pending buffer overflow");
800            goto errorexit;
801        }
802        memcpy(ctx->pending, inbuf,
803            ctx->pendingsize * Py_UNICODE_SIZE);
804    }
805
806    if (inbuf_tmp != NULL)
807        PyMem_Del(inbuf_tmp);
808    Py_XDECREF(ucvt);
809    return r;
810
811errorexit:
812    if (inbuf_tmp != NULL)
813        PyMem_Del(inbuf_tmp);
814    Py_XDECREF(r);
815    Py_XDECREF(ucvt);
816    return NULL;
817}
818
819static int
820decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
821                       MultibyteDecodeBuffer *buf)
822{
823    Py_ssize_t npendings;
824
825    npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
826    if (npendings + ctx->pendingsize > MAXDECPENDING ||
827        npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
828            PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
829            return -1;
830    }
831    memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
832    ctx->pendingsize += npendings;
833    return 0;
834}
835
836static int
837decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
838                       Py_ssize_t size)
839{
840    buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
841    buf->inbuf_end = buf->inbuf_top + size;
842    if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
843        buf->outobj = PyUnicode_FromUnicode(NULL, size);
844        if (buf->outobj == NULL)
845            return -1;
846        buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
847        buf->outbuf_end = buf->outbuf +
848                          PyUnicode_GET_SIZE(buf->outobj);
849    }
850
851    return 0;
852}
853
854static int
855decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
856                    MultibyteDecodeBuffer *buf)
857{
858    while (buf->inbuf < buf->inbuf_end) {
859        Py_ssize_t inleft, outleft;
860        Py_ssize_t r;
861
862        inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
863        outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
864
865        r = ctx->codec->decode(&ctx->state, ctx->codec->config,
866            &buf->inbuf, inleft, &buf->outbuf, outleft);
867        if (r == 0 || r == MBERR_TOOFEW)
868            break;
869        else if (multibytecodec_decerror(ctx->codec, &ctx->state,
870                                         buf, ctx->errors, r))
871            return -1;
872    }
873    return 0;
874}
875
876
877/**
878 * MultibyteIncrementalEncoder object
879 */
880
881static PyObject *
882mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
883                  PyObject *args, PyObject *kwargs)
884{
885    PyObject *data;
886    int final = 0;
887
888    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
889                    incrementalkwarglist, &data, &final))
890        return NULL;
891
892    return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
893}
894
895static PyObject *
896mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
897{
898    if (self->codec->decreset != NULL &&
899        self->codec->decreset(&self->state, self->codec->config) != 0)
900        return NULL;
901    self->pendingsize = 0;
902
903    Py_RETURN_NONE;
904}
905
906static struct PyMethodDef mbiencoder_methods[] = {
907    {"encode",          (PyCFunction)mbiencoder_encode,
908                    METH_VARARGS | METH_KEYWORDS, NULL},
909    {"reset",           (PyCFunction)mbiencoder_reset,
910                    METH_NOARGS, NULL},
911    {NULL,              NULL},
912};
913
914static PyObject *
915mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
916{
917    MultibyteIncrementalEncoderObject *self;
918    PyObject *codec = NULL;
919    char *errors = NULL;
920
921    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
922                                     incnewkwarglist, &errors))
923        return NULL;
924
925    self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
926    if (self == NULL)
927        return NULL;
928
929    codec = PyObject_GetAttrString((PyObject *)type, "codec");
930    if (codec == NULL)
931        goto errorexit;
932    if (!MultibyteCodec_Check(codec)) {
933        PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
934        goto errorexit;
935    }
936
937    self->codec = ((MultibyteCodecObject *)codec)->codec;
938    self->pendingsize = 0;
939    self->errors = internal_error_callback(errors);
940    if (self->errors == NULL)
941        goto errorexit;
942    if (self->codec->encinit != NULL &&
943        self->codec->encinit(&self->state, self->codec->config) != 0)
944        goto errorexit;
945
946    Py_DECREF(codec);
947    return (PyObject *)self;
948
949errorexit:
950    Py_XDECREF(self);
951    Py_XDECREF(codec);
952    return NULL;
953}
954
955static int
956mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
957{
958    return 0;
959}
960
961static int
962mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
963                    visitproc visit, void *arg)
964{
965    if (ERROR_ISCUSTOM(self->errors))
966        Py_VISIT(self->errors);
967    return 0;
968}
969
970static void
971mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
972{
973    PyObject_GC_UnTrack(self);
974    ERROR_DECREF(self->errors);
975    Py_TYPE(self)->tp_free(self);
976}
977
978static PyTypeObject MultibyteIncrementalEncoder_Type = {
979    PyVarObject_HEAD_INIT(NULL, 0)
980    "MultibyteIncrementalEncoder",      /* tp_name */
981    sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
982    0,                                  /* tp_itemsize */
983    /*  methods  */
984    (destructor)mbiencoder_dealloc, /* tp_dealloc */
985    0,                                  /* tp_print */
986    0,                                  /* tp_getattr */
987    0,                                  /* tp_setattr */
988    0,                                  /* tp_compare */
989    0,                                  /* tp_repr */
990    0,                                  /* tp_as_number */
991    0,                                  /* tp_as_sequence */
992    0,                                  /* tp_as_mapping */
993    0,                                  /* tp_hash */
994    0,                                  /* tp_call */
995    0,                                  /* tp_str */
996    PyObject_GenericGetAttr,            /* tp_getattro */
997    0,                                  /* tp_setattro */
998    0,                                  /* tp_as_buffer */
999    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1000        | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1001    0,                                  /* tp_doc */
1002    (traverseproc)mbiencoder_traverse,          /* tp_traverse */
1003    0,                                  /* tp_clear */
1004    0,                                  /* tp_richcompare */
1005    0,                                  /* tp_weaklistoffset */
1006    0,                                  /* tp_iter */
1007    0,                                  /* tp_iterext */
1008    mbiencoder_methods,                 /* tp_methods */
1009    0,                                  /* tp_members */
1010    codecctx_getsets,                   /* tp_getset */
1011    0,                                  /* tp_base */
1012    0,                                  /* tp_dict */
1013    0,                                  /* tp_descr_get */
1014    0,                                  /* tp_descr_set */
1015    0,                                  /* tp_dictoffset */
1016    mbiencoder_init,                    /* tp_init */
1017    0,                                  /* tp_alloc */
1018    mbiencoder_new,                     /* tp_new */
1019};
1020
1021
1022/**
1023 * MultibyteIncrementalDecoder object
1024 */
1025
1026static PyObject *
1027mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
1028                  PyObject *args, PyObject *kwargs)
1029{
1030    MultibyteDecodeBuffer buf;
1031    char *data, *wdata = NULL;
1032    Py_buffer pdata;
1033    Py_ssize_t wsize, finalsize = 0, size, origpending;
1034    int final = 0;
1035
1036    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i:decode",
1037                    incrementalkwarglist, &pdata, &final))
1038        return NULL;
1039    data = pdata.buf;
1040    size = pdata.len;
1041
1042    buf.outobj = buf.excobj = NULL;
1043    origpending = self->pendingsize;
1044
1045    if (self->pendingsize == 0) {
1046        wsize = size;
1047        wdata = data;
1048    }
1049    else {
1050        if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1051            PyErr_NoMemory();
1052            goto errorexit;
1053        }
1054        wsize = size + self->pendingsize;
1055        wdata = PyMem_Malloc(wsize);
1056        if (wdata == NULL)
1057            goto errorexit;
1058        memcpy(wdata, self->pending, self->pendingsize);
1059        memcpy(wdata + self->pendingsize, data, size);
1060        self->pendingsize = 0;
1061    }
1062
1063    if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1064        goto errorexit;
1065
1066    if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1067        goto errorexit;
1068
1069    if (final && buf.inbuf < buf.inbuf_end) {
1070        if (multibytecodec_decerror(self->codec, &self->state,
1071                        &buf, self->errors, MBERR_TOOFEW)) {
1072            /* recover the original pending buffer */
1073            memcpy(self->pending, wdata, origpending);
1074            self->pendingsize = origpending;
1075            goto errorexit;
1076        }
1077    }
1078
1079    if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1080        if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1081            goto errorexit;
1082    }
1083
1084    finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
1085    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
1086        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
1087            goto errorexit;
1088
1089    PyBuffer_Release(&pdata);
1090    if (wdata != data)
1091        PyMem_Del(wdata);
1092    Py_XDECREF(buf.excobj);
1093    return buf.outobj;
1094
1095errorexit:
1096    PyBuffer_Release(&pdata);
1097    if (wdata != NULL && wdata != data)
1098        PyMem_Del(wdata);
1099    Py_XDECREF(buf.excobj);
1100    Py_XDECREF(buf.outobj);
1101    return NULL;
1102}
1103
1104static PyObject *
1105mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
1106{
1107    if (self->codec->decreset != NULL &&
1108        self->codec->decreset(&self->state, self->codec->config) != 0)
1109        return NULL;
1110    self->pendingsize = 0;
1111
1112    Py_RETURN_NONE;
1113}
1114
1115static struct PyMethodDef mbidecoder_methods[] = {
1116    {"decode",          (PyCFunction)mbidecoder_decode,
1117                    METH_VARARGS | METH_KEYWORDS, NULL},
1118    {"reset",           (PyCFunction)mbidecoder_reset,
1119                    METH_NOARGS, NULL},
1120    {NULL,              NULL},
1121};
1122
1123static PyObject *
1124mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1125{
1126    MultibyteIncrementalDecoderObject *self;
1127    PyObject *codec = NULL;
1128    char *errors = NULL;
1129
1130    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1131                                     incnewkwarglist, &errors))
1132        return NULL;
1133
1134    self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1135    if (self == NULL)
1136        return NULL;
1137
1138    codec = PyObject_GetAttrString((PyObject *)type, "codec");
1139    if (codec == NULL)
1140        goto errorexit;
1141    if (!MultibyteCodec_Check(codec)) {
1142        PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1143        goto errorexit;
1144    }
1145
1146    self->codec = ((MultibyteCodecObject *)codec)->codec;
1147    self->pendingsize = 0;
1148    self->errors = internal_error_callback(errors);
1149    if (self->errors == NULL)
1150        goto errorexit;
1151    if (self->codec->decinit != NULL &&
1152        self->codec->decinit(&self->state, self->codec->config) != 0)
1153        goto errorexit;
1154
1155    Py_DECREF(codec);
1156    return (PyObject *)self;
1157
1158errorexit:
1159    Py_XDECREF(self);
1160    Py_XDECREF(codec);
1161    return NULL;
1162}
1163
1164static int
1165mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1166{
1167    return 0;
1168}
1169
1170static int
1171mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1172                    visitproc visit, void *arg)
1173{
1174    if (ERROR_ISCUSTOM(self->errors))
1175        Py_VISIT(self->errors);
1176    return 0;
1177}
1178
1179static void
1180mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1181{
1182    PyObject_GC_UnTrack(self);
1183    ERROR_DECREF(self->errors);
1184    Py_TYPE(self)->tp_free(self);
1185}
1186
1187static PyTypeObject MultibyteIncrementalDecoder_Type = {
1188    PyVarObject_HEAD_INIT(NULL, 0)
1189    "MultibyteIncrementalDecoder",      /* tp_name */
1190    sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1191    0,                                  /* tp_itemsize */
1192    /*  methods  */
1193    (destructor)mbidecoder_dealloc, /* tp_dealloc */
1194    0,                                  /* tp_print */
1195    0,                                  /* tp_getattr */
1196    0,                                  /* tp_setattr */
1197    0,                                  /* tp_compare */
1198    0,                                  /* tp_repr */
1199    0,                                  /* tp_as_number */
1200    0,                                  /* tp_as_sequence */
1201    0,                                  /* tp_as_mapping */
1202    0,                                  /* tp_hash */
1203    0,                                  /* tp_call */
1204    0,                                  /* tp_str */
1205    PyObject_GenericGetAttr,            /* tp_getattro */
1206    0,                                  /* tp_setattro */
1207    0,                                  /* tp_as_buffer */
1208    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1209        | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1210    0,                                  /* tp_doc */
1211    (traverseproc)mbidecoder_traverse,          /* tp_traverse */
1212    0,                                  /* tp_clear */
1213    0,                                  /* tp_richcompare */
1214    0,                                  /* tp_weaklistoffset */
1215    0,                                  /* tp_iter */
1216    0,                                  /* tp_iterext */
1217    mbidecoder_methods,                 /* tp_methods */
1218    0,                                  /* tp_members */
1219    codecctx_getsets,                   /* tp_getset */
1220    0,                                  /* tp_base */
1221    0,                                  /* tp_dict */
1222    0,                                  /* tp_descr_get */
1223    0,                                  /* tp_descr_set */
1224    0,                                  /* tp_dictoffset */
1225    mbidecoder_init,                    /* tp_init */
1226    0,                                  /* tp_alloc */
1227    mbidecoder_new,                     /* tp_new */
1228};
1229
1230
1231/**
1232 * MultibyteStreamReader object
1233 */
1234
1235static PyObject *
1236mbstreamreader_iread(MultibyteStreamReaderObject *self,
1237                     const char *method, Py_ssize_t sizehint)
1238{
1239    MultibyteDecodeBuffer buf;
1240    PyObject *cres;
1241    Py_ssize_t rsize, finalsize = 0;
1242
1243    if (sizehint == 0)
1244        return PyUnicode_FromUnicode(NULL, 0);
1245
1246    buf.outobj = buf.excobj = NULL;
1247    cres = NULL;
1248
1249    for (;;) {
1250        int endoffile;
1251
1252        if (sizehint < 0)
1253            cres = PyObject_CallMethod(self->stream,
1254                            (char *)method, NULL);
1255        else
1256            cres = PyObject_CallMethod(self->stream,
1257                            (char *)method, "i", sizehint);
1258        if (cres == NULL)
1259            goto errorexit;
1260
1261        if (!PyString_Check(cres)) {
1262            PyErr_SetString(PyExc_TypeError,
1263                            "stream function returned a "
1264                            "non-string object");
1265            goto errorexit;
1266        }
1267
1268        endoffile = (PyString_GET_SIZE(cres) == 0);
1269
1270        if (self->pendingsize > 0) {
1271            PyObject *ctr;
1272            char *ctrdata;
1273
1274            if (PyString_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1275                PyErr_NoMemory();
1276                goto errorexit;
1277        }
1278                    rsize = PyString_GET_SIZE(cres) + self->pendingsize;
1279                    ctr = PyString_FromStringAndSize(NULL, rsize);
1280                    if (ctr == NULL)
1281                            goto errorexit;
1282                    ctrdata = PyString_AS_STRING(ctr);
1283                    memcpy(ctrdata, self->pending, self->pendingsize);
1284                    memcpy(ctrdata + self->pendingsize,
1285                            PyString_AS_STRING(cres),
1286                            PyString_GET_SIZE(cres));
1287                    Py_DECREF(cres);
1288                    cres = ctr;
1289                    self->pendingsize = 0;
1290        }
1291
1292        rsize = PyString_GET_SIZE(cres);
1293        if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),
1294                                   rsize) != 0)
1295            goto errorexit;
1296
1297        if (rsize > 0 && decoder_feed_buffer(
1298                        (MultibyteStatefulDecoderContext *)self, &buf))
1299            goto errorexit;
1300
1301        if (endoffile || sizehint < 0) {
1302            if (buf.inbuf < buf.inbuf_end &&
1303                multibytecodec_decerror(self->codec, &self->state,
1304                            &buf, self->errors, MBERR_TOOFEW))
1305                goto errorexit;
1306        }
1307
1308        if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1309            if (decoder_append_pending(STATEFUL_DCTX(self),
1310                                       &buf) != 0)
1311                goto errorexit;
1312        }
1313
1314        finalsize = (Py_ssize_t)(buf.outbuf -
1315                        PyUnicode_AS_UNICODE(buf.outobj));
1316        Py_DECREF(cres);
1317        cres = NULL;
1318
1319        if (sizehint < 0 || finalsize != 0 || rsize == 0)
1320            break;
1321
1322        sizehint = 1; /* read 1 more byte and retry */
1323    }
1324
1325    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
1326        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
1327            goto errorexit;
1328
1329    Py_XDECREF(cres);
1330    Py_XDECREF(buf.excobj);
1331    return buf.outobj;
1332
1333errorexit:
1334    Py_XDECREF(cres);
1335    Py_XDECREF(buf.excobj);
1336    Py_XDECREF(buf.outobj);
1337    return NULL;
1338}
1339
1340static PyObject *
1341mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
1342{
1343    PyObject *sizeobj = NULL;
1344    Py_ssize_t size;
1345
1346    if (!PyArg_UnpackTuple(args, "read", 0, 1, &sizeobj))
1347        return NULL;
1348
1349    if (sizeobj == Py_None || sizeobj == NULL)
1350        size = -1;
1351    else if (PyInt_Check(sizeobj))
1352        size = PyInt_AsSsize_t(sizeobj);
1353    else {
1354        PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1355        return NULL;
1356    }
1357
1358    return mbstreamreader_iread(self, "read", size);
1359}
1360
1361static PyObject *
1362mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
1363{
1364    PyObject *sizeobj = NULL;
1365    Py_ssize_t size;
1366
1367    if (!PyArg_UnpackTuple(args, "readline", 0, 1, &sizeobj))
1368        return NULL;
1369
1370    if (sizeobj == Py_None || sizeobj == NULL)
1371        size = -1;
1372    else if (PyInt_Check(sizeobj))
1373        size = PyInt_AsSsize_t(sizeobj);
1374    else {
1375        PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1376        return NULL;
1377    }
1378
1379    return mbstreamreader_iread(self, "readline", size);
1380}
1381
1382static PyObject *
1383mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
1384{
1385    PyObject *sizehintobj = NULL, *r, *sr;
1386    Py_ssize_t sizehint;
1387
1388    if (!PyArg_UnpackTuple(args, "readlines", 0, 1, &sizehintobj))
1389        return NULL;
1390
1391    if (sizehintobj == Py_None || sizehintobj == NULL)
1392        sizehint = -1;
1393    else if (PyInt_Check(sizehintobj))
1394        sizehint = PyInt_AsSsize_t(sizehintobj);
1395    else {
1396        PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1397        return NULL;
1398    }
1399
1400    r = mbstreamreader_iread(self, "read", sizehint);
1401    if (r == NULL)
1402        return NULL;
1403
1404    sr = PyUnicode_Splitlines(r, 1);
1405    Py_DECREF(r);
1406    return sr;
1407}
1408
1409static PyObject *
1410mbstreamreader_reset(MultibyteStreamReaderObject *self)
1411{
1412    if (self->codec->decreset != NULL &&
1413        self->codec->decreset(&self->state, self->codec->config) != 0)
1414        return NULL;
1415    self->pendingsize = 0;
1416
1417    Py_RETURN_NONE;
1418}
1419
1420static struct PyMethodDef mbstreamreader_methods[] = {
1421    {"read",            (PyCFunction)mbstreamreader_read,
1422                    METH_VARARGS, NULL},
1423    {"readline",        (PyCFunction)mbstreamreader_readline,
1424                    METH_VARARGS, NULL},
1425    {"readlines",       (PyCFunction)mbstreamreader_readlines,
1426                    METH_VARARGS, NULL},
1427    {"reset",           (PyCFunction)mbstreamreader_reset,
1428                    METH_NOARGS, NULL},
1429    {NULL,              NULL},
1430};
1431
1432static PyMemberDef mbstreamreader_members[] = {
1433    {"stream",          T_OBJECT,
1434                    offsetof(MultibyteStreamReaderObject, stream),
1435                    READONLY, NULL},
1436    {NULL,}
1437};
1438
1439static PyObject *
1440mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1441{
1442    MultibyteStreamReaderObject *self;
1443    PyObject *stream, *codec = NULL;
1444    char *errors = NULL;
1445
1446    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1447                            streamkwarglist, &stream, &errors))
1448        return NULL;
1449
1450    self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1451    if (self == NULL)
1452        return NULL;
1453
1454    codec = PyObject_GetAttrString((PyObject *)type, "codec");
1455    if (codec == NULL)
1456        goto errorexit;
1457    if (!MultibyteCodec_Check(codec)) {
1458        PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1459        goto errorexit;
1460    }
1461
1462    self->codec = ((MultibyteCodecObject *)codec)->codec;
1463    self->stream = stream;
1464    Py_INCREF(stream);
1465    self->pendingsize = 0;
1466    self->errors = internal_error_callback(errors);
1467    if (self->errors == NULL)
1468        goto errorexit;
1469    if (self->codec->decinit != NULL &&
1470        self->codec->decinit(&self->state, self->codec->config) != 0)
1471        goto errorexit;
1472
1473    Py_DECREF(codec);
1474    return (PyObject *)self;
1475
1476errorexit:
1477    Py_XDECREF(self);
1478    Py_XDECREF(codec);
1479    return NULL;
1480}
1481
1482static int
1483mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1484{
1485    return 0;
1486}
1487
1488static int
1489mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1490                        visitproc visit, void *arg)
1491{
1492    if (ERROR_ISCUSTOM(self->errors))
1493        Py_VISIT(self->errors);
1494    Py_VISIT(self->stream);
1495    return 0;
1496}
1497
1498static void
1499mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1500{
1501    PyObject_GC_UnTrack(self);
1502    ERROR_DECREF(self->errors);
1503    Py_XDECREF(self->stream);
1504    Py_TYPE(self)->tp_free(self);
1505}
1506
1507static PyTypeObject MultibyteStreamReader_Type = {
1508    PyVarObject_HEAD_INIT(NULL, 0)
1509    "MultibyteStreamReader",            /* tp_name */
1510    sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1511    0,                                  /* tp_itemsize */
1512    /*  methods  */
1513    (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1514    0,                                  /* tp_print */
1515    0,                                  /* tp_getattr */
1516    0,                                  /* tp_setattr */
1517    0,                                  /* tp_compare */
1518    0,                                  /* tp_repr */
1519    0,                                  /* tp_as_number */
1520    0,                                  /* tp_as_sequence */
1521    0,                                  /* tp_as_mapping */
1522    0,                                  /* tp_hash */
1523    0,                                  /* tp_call */
1524    0,                                  /* tp_str */
1525    PyObject_GenericGetAttr,            /* tp_getattro */
1526    0,                                  /* tp_setattro */
1527    0,                                  /* tp_as_buffer */
1528    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1529        | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1530    0,                                  /* tp_doc */
1531    (traverseproc)mbstreamreader_traverse,      /* tp_traverse */
1532    0,                                  /* tp_clear */
1533    0,                                  /* tp_richcompare */
1534    0,                                  /* tp_weaklistoffset */
1535    0,                                  /* tp_iter */
1536    0,                                  /* tp_iterext */
1537    mbstreamreader_methods,             /* tp_methods */
1538    mbstreamreader_members,             /* tp_members */
1539    codecctx_getsets,                   /* tp_getset */
1540    0,                                  /* tp_base */
1541    0,                                  /* tp_dict */
1542    0,                                  /* tp_descr_get */
1543    0,                                  /* tp_descr_set */
1544    0,                                  /* tp_dictoffset */
1545    mbstreamreader_init,                /* tp_init */
1546    0,                                  /* tp_alloc */
1547    mbstreamreader_new,                 /* tp_new */
1548};
1549
1550
1551/**
1552 * MultibyteStreamWriter object
1553 */
1554
1555static int
1556mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1557                      PyObject *unistr)
1558{
1559    PyObject *str, *wr;
1560
1561    str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1562    if (str == NULL)
1563        return -1;
1564
1565    wr = PyObject_CallMethod(self->stream, "write", "O", str);
1566    Py_DECREF(str);
1567    if (wr == NULL)
1568        return -1;
1569
1570    Py_DECREF(wr);
1571    return 0;
1572}
1573
1574static PyObject *
1575mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *strobj)
1576{
1577    if (mbstreamwriter_iwrite(self, strobj))
1578        return NULL;
1579    else
1580        Py_RETURN_NONE;
1581}
1582
1583static PyObject *
1584mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)
1585{
1586    PyObject *strobj;
1587    int i, r;
1588
1589    if (!PySequence_Check(lines)) {
1590        PyErr_SetString(PyExc_TypeError,
1591                        "arg must be a sequence object");
1592        return NULL;
1593    }
1594
1595    for (i = 0; i < PySequence_Length(lines); i++) {
1596        /* length can be changed even within this loop */
1597        strobj = PySequence_GetItem(lines, i);
1598        if (strobj == NULL)
1599            return NULL;
1600
1601        r = mbstreamwriter_iwrite(self, strobj);
1602        Py_DECREF(strobj);
1603        if (r == -1)
1604            return NULL;
1605    }
1606
1607    Py_RETURN_NONE;
1608}
1609
1610static PyObject *
1611mbstreamwriter_reset(MultibyteStreamWriterObject *self)
1612{
1613    const Py_UNICODE *pending;
1614    PyObject *pwrt;
1615
1616    pending = self->pending;
1617    pwrt = multibytecodec_encode(self->codec, &self->state,
1618                    &pending, self->pendingsize, self->errors,
1619                    MBENC_FLUSH | MBENC_RESET);
1620    /* some pending buffer can be truncated when UnicodeEncodeError is
1621     * raised on 'strict' mode. but, 'reset' method is designed to
1622     * reset the pending buffer or states so failed string sequence
1623     * ought to be missed */
1624    self->pendingsize = 0;
1625    if (pwrt == NULL)
1626        return NULL;
1627
1628    if (PyString_Size(pwrt) > 0) {
1629        PyObject *wr;
1630        wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
1631        if (wr == NULL) {
1632            Py_DECREF(pwrt);
1633            return NULL;
1634        }
1635    }
1636    Py_DECREF(pwrt);
1637
1638    Py_RETURN_NONE;
1639}
1640
1641static PyObject *
1642mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1643{
1644    MultibyteStreamWriterObject *self;
1645    PyObject *stream, *codec = NULL;
1646    char *errors = NULL;
1647
1648    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1649                            streamkwarglist, &stream, &errors))
1650        return NULL;
1651
1652    self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1653    if (self == NULL)
1654        return NULL;
1655
1656    codec = PyObject_GetAttrString((PyObject *)type, "codec");
1657    if (codec == NULL)
1658        goto errorexit;
1659    if (!MultibyteCodec_Check(codec)) {
1660        PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1661        goto errorexit;
1662    }
1663
1664    self->codec = ((MultibyteCodecObject *)codec)->codec;
1665    self->stream = stream;
1666    Py_INCREF(stream);
1667    self->pendingsize = 0;
1668    self->errors = internal_error_callback(errors);
1669    if (self->errors == NULL)
1670        goto errorexit;
1671    if (self->codec->encinit != NULL &&
1672        self->codec->encinit(&self->state, self->codec->config) != 0)
1673        goto errorexit;
1674
1675    Py_DECREF(codec);
1676    return (PyObject *)self;
1677
1678errorexit:
1679    Py_XDECREF(self);
1680    Py_XDECREF(codec);
1681    return NULL;
1682}
1683
1684static int
1685mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1686{
1687    return 0;
1688}
1689
1690static int
1691mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1692                        visitproc visit, void *arg)
1693{
1694    if (ERROR_ISCUSTOM(self->errors))
1695        Py_VISIT(self->errors);
1696    Py_VISIT(self->stream);
1697    return 0;
1698}
1699
1700static void
1701mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1702{
1703    PyObject_GC_UnTrack(self);
1704    ERROR_DECREF(self->errors);
1705    Py_XDECREF(self->stream);
1706    Py_TYPE(self)->tp_free(self);
1707}
1708
1709static struct PyMethodDef mbstreamwriter_methods[] = {
1710    {"write",           (PyCFunction)mbstreamwriter_write,
1711                    METH_O, NULL},
1712    {"writelines",      (PyCFunction)mbstreamwriter_writelines,
1713                    METH_O, NULL},
1714    {"reset",           (PyCFunction)mbstreamwriter_reset,
1715                    METH_NOARGS, NULL},
1716    {NULL,              NULL},
1717};
1718
1719static PyMemberDef mbstreamwriter_members[] = {
1720    {"stream",          T_OBJECT,
1721                    offsetof(MultibyteStreamWriterObject, stream),
1722                    READONLY, NULL},
1723    {NULL,}
1724};
1725
1726static PyTypeObject MultibyteStreamWriter_Type = {
1727    PyVarObject_HEAD_INIT(NULL, 0)
1728    "MultibyteStreamWriter",            /* tp_name */
1729    sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1730    0,                                  /* tp_itemsize */
1731    /*  methods  */
1732    (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1733    0,                                  /* tp_print */
1734    0,                                  /* tp_getattr */
1735    0,                                  /* tp_setattr */
1736    0,                                  /* tp_compare */
1737    0,                                  /* tp_repr */
1738    0,                                  /* tp_as_number */
1739    0,                                  /* tp_as_sequence */
1740    0,                                  /* tp_as_mapping */
1741    0,                                  /* tp_hash */
1742    0,                                  /* tp_call */
1743    0,                                  /* tp_str */
1744    PyObject_GenericGetAttr,            /* tp_getattro */
1745    0,                                  /* tp_setattro */
1746    0,                                  /* tp_as_buffer */
1747    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1748        | Py_TPFLAGS_BASETYPE,          /* tp_flags */
1749    0,                                  /* tp_doc */
1750    (traverseproc)mbstreamwriter_traverse,      /* tp_traverse */
1751    0,                                  /* tp_clear */
1752    0,                                  /* tp_richcompare */
1753    0,                                  /* tp_weaklistoffset */
1754    0,                                  /* tp_iter */
1755    0,                                  /* tp_iterext */
1756    mbstreamwriter_methods,             /* tp_methods */
1757    mbstreamwriter_members,             /* tp_members */
1758    codecctx_getsets,                   /* tp_getset */
1759    0,                                  /* tp_base */
1760    0,                                  /* tp_dict */
1761    0,                                  /* tp_descr_get */
1762    0,                                  /* tp_descr_set */
1763    0,                                  /* tp_dictoffset */
1764    mbstreamwriter_init,                /* tp_init */
1765    0,                                  /* tp_alloc */
1766    mbstreamwriter_new,                 /* tp_new */
1767};
1768
1769
1770/**
1771 * Exposed factory function
1772 */
1773
1774static PyObject *
1775__create_codec(PyObject *ignore, PyObject *arg)
1776{
1777    MultibyteCodecObject *self;
1778    MultibyteCodec *codec;
1779
1780    if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1781        PyErr_SetString(PyExc_ValueError, "argument type invalid");
1782        return NULL;
1783    }
1784
1785    codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1786    if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1787        return NULL;
1788
1789    self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1790    if (self == NULL)
1791        return NULL;
1792    self->codec = codec;
1793
1794    return (PyObject *)self;
1795}
1796
1797static struct PyMethodDef __methods[] = {
1798    {"__create_codec", (PyCFunction)__create_codec, METH_O},
1799    {NULL, NULL},
1800};
1801
1802PyMODINIT_FUNC
1803init_multibytecodec(void)
1804{
1805    int i;
1806    PyObject *m;
1807    PyTypeObject *typelist[] = {
1808        &MultibyteIncrementalEncoder_Type,
1809        &MultibyteIncrementalDecoder_Type,
1810        &MultibyteStreamReader_Type,
1811        &MultibyteStreamWriter_Type,
1812        NULL
1813    };
1814
1815    if (PyType_Ready(&MultibyteCodec_Type) < 0)
1816        return;
1817
1818    m = Py_InitModule("_multibytecodec", __methods);
1819    if (m == NULL)
1820        return;
1821
1822    for (i = 0; typelist[i] != NULL; i++) {
1823        if (PyType_Ready(typelist[i]) < 0)
1824            return;
1825        Py_INCREF(typelist[i]);
1826        PyModule_AddObject(m, typelist[i]->tp_name,
1827                           (PyObject *)typelist[i]);
1828    }
1829
1830    if (PyErr_Occurred())
1831        Py_FatalError("can't initialize the _multibytecodec module");
1832}
1833