codecs.c revision aacfcccdc39b074521d3e5d4b5a1b1e020662366
1/* ------------------------------------------------------------------------
2
3   Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7Copyright (c) Corporation for National Research Initiatives.
8
9   ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include "ucnhash.h"
13#include <ctype.h>
14
15const char *Py_hexdigits = "0123456789abcdef";
16
17/* --- Codec Registry ----------------------------------------------------- */
18
19/* Import the standard encodings package which will register the first
20   codec search function.
21
22   This is done in a lazy way so that the Unicode implementation does
23   not downgrade startup time of scripts not needing it.
24
25   ImportErrors are silently ignored by this function. Only one try is
26   made.
27
28*/
29
30static int _PyCodecRegistry_Init(void); /* Forward */
31
32int PyCodec_Register(PyObject *search_function)
33{
34    PyInterpreterState *interp = PyThreadState_GET()->interp;
35    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
36        goto onError;
37    if (search_function == NULL) {
38        PyErr_BadArgument();
39        goto onError;
40    }
41    if (!PyCallable_Check(search_function)) {
42        PyErr_SetString(PyExc_TypeError, "argument must be callable");
43        goto onError;
44    }
45    return PyList_Append(interp->codec_search_path, search_function);
46
47 onError:
48    return -1;
49}
50
51/* Convert a string to a normalized Python string: all characters are
52   converted to lower case, spaces are replaced with underscores. */
53
54static
55PyObject *normalizestring(const char *string)
56{
57    size_t i;
58    size_t len = strlen(string);
59    char *p;
60    PyObject *v;
61
62    if (len > PY_SSIZE_T_MAX) {
63        PyErr_SetString(PyExc_OverflowError, "string is too large");
64        return NULL;
65    }
66
67    p = PyMem_Malloc(len + 1);
68    if (p == NULL)
69        return PyErr_NoMemory();
70    for (i = 0; i < len; i++) {
71        char ch = string[i];
72        if (ch == ' ')
73            ch = '-';
74        else
75            ch = Py_TOLOWER(Py_CHARMASK(ch));
76        p[i] = ch;
77    }
78    p[i] = '\0';
79    v = PyUnicode_FromString(p);
80    if (v == NULL)
81        return NULL;
82    PyMem_Free(p);
83    return v;
84}
85
86/* Lookup the given encoding and return a tuple providing the codec
87   facilities.
88
89   The encoding string is looked up converted to all lower-case
90   characters. This makes encodings looked up through this mechanism
91   effectively case-insensitive.
92
93   If no codec is found, a LookupError is set and NULL returned.
94
95   As side effect, this tries to load the encodings package, if not
96   yet done. This is part of the lazy load strategy for the encodings
97   package.
98
99*/
100
101PyObject *_PyCodec_Lookup(const char *encoding)
102{
103    PyInterpreterState *interp;
104    PyObject *result, *args = NULL, *v;
105    Py_ssize_t i, len;
106
107    if (encoding == NULL) {
108        PyErr_BadArgument();
109        goto onError;
110    }
111
112    interp = PyThreadState_GET()->interp;
113    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
114        goto onError;
115
116    /* Convert the encoding to a normalized Python string: all
117       characters are converted to lower case, spaces and hyphens are
118       replaced with underscores. */
119    v = normalizestring(encoding);
120    if (v == NULL)
121        goto onError;
122    PyUnicode_InternInPlace(&v);
123
124    /* First, try to lookup the name in the registry dictionary */
125    result = PyDict_GetItem(interp->codec_search_cache, v);
126    if (result != NULL) {
127        Py_INCREF(result);
128        Py_DECREF(v);
129        return result;
130    }
131
132    /* Next, scan the search functions in order of registration */
133    args = PyTuple_New(1);
134    if (args == NULL)
135        goto onError;
136    PyTuple_SET_ITEM(args,0,v);
137
138    len = PyList_Size(interp->codec_search_path);
139    if (len < 0)
140        goto onError;
141    if (len == 0) {
142        PyErr_SetString(PyExc_LookupError,
143                        "no codec search functions registered: "
144                        "can't find encoding");
145        goto onError;
146    }
147
148    for (i = 0; i < len; i++) {
149        PyObject *func;
150
151        func = PyList_GetItem(interp->codec_search_path, i);
152        if (func == NULL)
153            goto onError;
154        result = PyEval_CallObject(func, args);
155        if (result == NULL)
156            goto onError;
157        if (result == Py_None) {
158            Py_DECREF(result);
159            continue;
160        }
161        if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
162            PyErr_SetString(PyExc_TypeError,
163                            "codec search functions must return 4-tuples");
164            Py_DECREF(result);
165            goto onError;
166        }
167        break;
168    }
169    if (i == len) {
170        /* XXX Perhaps we should cache misses too ? */
171        PyErr_Format(PyExc_LookupError,
172                     "unknown encoding: %s", encoding);
173        goto onError;
174    }
175
176    /* Cache and return the result */
177    if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
178        Py_DECREF(result);
179        goto onError;
180    }
181    Py_DECREF(args);
182    return result;
183
184 onError:
185    Py_XDECREF(args);
186    return NULL;
187}
188
189int _PyCodec_Forget(const char *encoding)
190{
191    PyInterpreterState *interp;
192    PyObject *v;
193    int result;
194
195    interp = PyThreadState_GET()->interp;
196    if (interp->codec_search_path == NULL) {
197        return -1;
198    }
199
200    /* Convert the encoding to a normalized Python string: all
201       characters are converted to lower case, spaces and hyphens are
202       replaced with underscores. */
203    v = normalizestring(encoding);
204    if (v == NULL) {
205        return -1;
206    }
207
208    /* Drop the named codec from the internal cache */
209    result = PyDict_DelItem(interp->codec_search_cache, v);
210    Py_DECREF(v);
211
212    return result;
213}
214
215/* Codec registry encoding check API. */
216
217int PyCodec_KnownEncoding(const char *encoding)
218{
219    PyObject *codecs;
220
221    codecs = _PyCodec_Lookup(encoding);
222    if (!codecs) {
223        PyErr_Clear();
224        return 0;
225    }
226    else {
227        Py_DECREF(codecs);
228        return 1;
229    }
230}
231
232static
233PyObject *args_tuple(PyObject *object,
234                     const char *errors)
235{
236    PyObject *args;
237
238    args = PyTuple_New(1 + (errors != NULL));
239    if (args == NULL)
240        return NULL;
241    Py_INCREF(object);
242    PyTuple_SET_ITEM(args,0,object);
243    if (errors) {
244        PyObject *v;
245
246        v = PyUnicode_FromString(errors);
247        if (v == NULL) {
248            Py_DECREF(args);
249            return NULL;
250        }
251        PyTuple_SET_ITEM(args, 1, v);
252    }
253    return args;
254}
255
256/* Helper function to get a codec item */
257
258static
259PyObject *codec_getitem(const char *encoding, int index)
260{
261    PyObject *codecs;
262    PyObject *v;
263
264    codecs = _PyCodec_Lookup(encoding);
265    if (codecs == NULL)
266        return NULL;
267    v = PyTuple_GET_ITEM(codecs, index);
268    Py_DECREF(codecs);
269    Py_INCREF(v);
270    return v;
271}
272
273/* Helper functions to create an incremental codec. */
274static
275PyObject *codec_makeincrementalcodec(PyObject *codec_info,
276                                     const char *errors,
277                                     const char *attrname)
278{
279    PyObject *ret, *inccodec;
280
281    inccodec = PyObject_GetAttrString(codec_info, attrname);
282    if (inccodec == NULL)
283        return NULL;
284    if (errors)
285        ret = PyObject_CallFunction(inccodec, "s", errors);
286    else
287        ret = PyObject_CallFunction(inccodec, NULL);
288    Py_DECREF(inccodec);
289    return ret;
290}
291
292static
293PyObject *codec_getincrementalcodec(const char *encoding,
294                                    const char *errors,
295                                    const char *attrname)
296{
297    PyObject *codec_info, *ret;
298
299    codec_info = _PyCodec_Lookup(encoding);
300    if (codec_info == NULL)
301        return NULL;
302    ret = codec_makeincrementalcodec(codec_info, errors, attrname);
303    Py_DECREF(codec_info);
304    return ret;
305}
306
307/* Helper function to create a stream codec. */
308
309static
310PyObject *codec_getstreamcodec(const char *encoding,
311                               PyObject *stream,
312                               const char *errors,
313                               const int index)
314{
315    PyObject *codecs, *streamcodec, *codeccls;
316
317    codecs = _PyCodec_Lookup(encoding);
318    if (codecs == NULL)
319        return NULL;
320
321    codeccls = PyTuple_GET_ITEM(codecs, index);
322    if (errors != NULL)
323        streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
324    else
325        streamcodec = PyObject_CallFunction(codeccls, "O", stream);
326    Py_DECREF(codecs);
327    return streamcodec;
328}
329
330/* Helpers to work with the result of _PyCodec_Lookup
331
332 */
333PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
334                                             const char *errors)
335{
336    return codec_makeincrementalcodec(codec_info, errors,
337                                      "incrementaldecoder");
338}
339
340PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
341                                             const char *errors)
342{
343    return codec_makeincrementalcodec(codec_info, errors,
344                                      "incrementalencoder");
345}
346
347
348/* Convenience APIs to query the Codec registry.
349
350   All APIs return a codec object with incremented refcount.
351
352 */
353
354PyObject *PyCodec_Encoder(const char *encoding)
355{
356    return codec_getitem(encoding, 0);
357}
358
359PyObject *PyCodec_Decoder(const char *encoding)
360{
361    return codec_getitem(encoding, 1);
362}
363
364PyObject *PyCodec_IncrementalEncoder(const char *encoding,
365                                     const char *errors)
366{
367    return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
368}
369
370PyObject *PyCodec_IncrementalDecoder(const char *encoding,
371                                     const char *errors)
372{
373    return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
374}
375
376PyObject *PyCodec_StreamReader(const char *encoding,
377                               PyObject *stream,
378                               const char *errors)
379{
380    return codec_getstreamcodec(encoding, stream, errors, 2);
381}
382
383PyObject *PyCodec_StreamWriter(const char *encoding,
384                               PyObject *stream,
385                               const char *errors)
386{
387    return codec_getstreamcodec(encoding, stream, errors, 3);
388}
389
390/* Helper that tries to ensure the reported exception chain indicates the
391 * codec that was invoked to trigger the failure without changing the type
392 * of the exception raised.
393 */
394static void
395wrap_codec_error(const char *operation,
396                 const char *encoding)
397{
398    /* TrySetFromCause will replace the active exception with a suitably
399     * updated clone if it can, otherwise it will leave the original
400     * exception alone.
401     */
402    _PyErr_TrySetFromCause("%s with '%s' codec failed",
403                           operation, encoding);
404}
405
406/* Encode an object (e.g. an Unicode object) using the given encoding
407   and return the resulting encoded object (usually a Python string).
408
409   errors is passed to the encoder factory as argument if non-NULL. */
410
411static PyObject *
412_PyCodec_EncodeInternal(PyObject *object,
413                        PyObject *encoder,
414                        const char *encoding,
415                        const char *errors)
416{
417    PyObject *args = NULL, *result = NULL;
418    PyObject *v = NULL;
419
420    args = args_tuple(object, errors);
421    if (args == NULL)
422        goto onError;
423
424    result = PyEval_CallObject(encoder, args);
425    if (result == NULL) {
426        wrap_codec_error("encoding", encoding);
427        goto onError;
428    }
429
430    if (!PyTuple_Check(result) ||
431        PyTuple_GET_SIZE(result) != 2) {
432        PyErr_SetString(PyExc_TypeError,
433                        "encoder must return a tuple (object, integer)");
434        goto onError;
435    }
436    v = PyTuple_GET_ITEM(result,0);
437    Py_INCREF(v);
438    /* We don't check or use the second (integer) entry. */
439
440    Py_DECREF(args);
441    Py_DECREF(encoder);
442    Py_DECREF(result);
443    return v;
444
445 onError:
446    Py_XDECREF(result);
447    Py_XDECREF(args);
448    Py_XDECREF(encoder);
449    return NULL;
450}
451
452/* Decode an object (usually a Python string) using the given encoding
453   and return an equivalent object (e.g. an Unicode object).
454
455   errors is passed to the decoder factory as argument if non-NULL. */
456
457static PyObject *
458_PyCodec_DecodeInternal(PyObject *object,
459                        PyObject *decoder,
460                        const char *encoding,
461                        const char *errors)
462{
463    PyObject *args = NULL, *result = NULL;
464    PyObject *v;
465
466    args = args_tuple(object, errors);
467    if (args == NULL)
468        goto onError;
469
470    result = PyEval_CallObject(decoder,args);
471    if (result == NULL) {
472        wrap_codec_error("decoding", encoding);
473        goto onError;
474    }
475    if (!PyTuple_Check(result) ||
476        PyTuple_GET_SIZE(result) != 2) {
477        PyErr_SetString(PyExc_TypeError,
478                        "decoder must return a tuple (object,integer)");
479        goto onError;
480    }
481    v = PyTuple_GET_ITEM(result,0);
482    Py_INCREF(v);
483    /* We don't check or use the second (integer) entry. */
484
485    Py_DECREF(args);
486    Py_DECREF(decoder);
487    Py_DECREF(result);
488    return v;
489
490 onError:
491    Py_XDECREF(args);
492    Py_XDECREF(decoder);
493    Py_XDECREF(result);
494    return NULL;
495}
496
497/* Generic encoding/decoding API */
498PyObject *PyCodec_Encode(PyObject *object,
499                         const char *encoding,
500                         const char *errors)
501{
502    PyObject *encoder;
503
504    encoder = PyCodec_Encoder(encoding);
505    if (encoder == NULL)
506        return NULL;
507
508    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
509}
510
511PyObject *PyCodec_Decode(PyObject *object,
512                         const char *encoding,
513                         const char *errors)
514{
515    PyObject *decoder;
516
517    decoder = PyCodec_Decoder(encoding);
518    if (decoder == NULL)
519        return NULL;
520
521    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
522}
523
524/* Text encoding/decoding API */
525PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
526                                       const char *alternate_command)
527{
528    _Py_IDENTIFIER(_is_text_encoding);
529    PyObject *codec;
530    PyObject *attr;
531    int is_text_codec;
532
533    codec = _PyCodec_Lookup(encoding);
534    if (codec == NULL)
535        return NULL;
536
537    /* Backwards compatibility: assume any raw tuple describes a text
538     * encoding, and the same for anything lacking the private
539     * attribute.
540     */
541    if (!PyTuple_CheckExact(codec)) {
542        attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding);
543        if (attr == NULL) {
544            if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
545                PyErr_Clear();
546            } else {
547                Py_DECREF(codec);
548                return NULL;
549            }
550        } else {
551            is_text_codec = PyObject_IsTrue(attr);
552            Py_DECREF(attr);
553            if (!is_text_codec) {
554                Py_DECREF(codec);
555                PyErr_Format(PyExc_LookupError,
556                             "'%.400s' is not a text encoding; "
557                             "use %s to handle arbitrary codecs",
558                             encoding, alternate_command);
559                return NULL;
560            }
561        }
562    }
563
564    /* This appears to be a valid text encoding */
565    return codec;
566}
567
568
569static
570PyObject *codec_getitem_checked(const char *encoding,
571                                const char *alternate_command,
572                                int index)
573{
574    PyObject *codec;
575    PyObject *v;
576
577    codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
578    if (codec == NULL)
579        return NULL;
580
581    v = PyTuple_GET_ITEM(codec, index);
582    Py_INCREF(v);
583    Py_DECREF(codec);
584    return v;
585}
586
587static PyObject * _PyCodec_TextEncoder(const char *encoding)
588{
589    return codec_getitem_checked(encoding, "codecs.encode()", 0);
590}
591
592static PyObject * _PyCodec_TextDecoder(const char *encoding)
593{
594    return codec_getitem_checked(encoding, "codecs.decode()", 1);
595}
596
597PyObject *_PyCodec_EncodeText(PyObject *object,
598                              const char *encoding,
599                              const char *errors)
600{
601    PyObject *encoder;
602
603    encoder = _PyCodec_TextEncoder(encoding);
604    if (encoder == NULL)
605        return NULL;
606
607    return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
608}
609
610PyObject *_PyCodec_DecodeText(PyObject *object,
611                              const char *encoding,
612                              const char *errors)
613{
614    PyObject *decoder;
615
616    decoder = _PyCodec_TextDecoder(encoding);
617    if (decoder == NULL)
618        return NULL;
619
620    return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
621}
622
623/* Register the error handling callback function error under the name
624   name. This function will be called by the codec when it encounters
625   an unencodable characters/undecodable bytes and doesn't know the
626   callback name, when name is specified as the error parameter
627   in the call to the encode/decode function.
628   Return 0 on success, -1 on error */
629int PyCodec_RegisterError(const char *name, PyObject *error)
630{
631    PyInterpreterState *interp = PyThreadState_GET()->interp;
632    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
633        return -1;
634    if (!PyCallable_Check(error)) {
635        PyErr_SetString(PyExc_TypeError, "handler must be callable");
636        return -1;
637    }
638    return PyDict_SetItemString(interp->codec_error_registry,
639                                name, error);
640}
641
642/* Lookup the error handling callback function registered under the
643   name error. As a special case NULL can be passed, in which case
644   the error handling callback for strict encoding will be returned. */
645PyObject *PyCodec_LookupError(const char *name)
646{
647    PyObject *handler = NULL;
648
649    PyInterpreterState *interp = PyThreadState_GET()->interp;
650    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
651        return NULL;
652
653    if (name==NULL)
654        name = "strict";
655    handler = PyDict_GetItemString(interp->codec_error_registry, name);
656    if (!handler)
657        PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
658    else
659        Py_INCREF(handler);
660    return handler;
661}
662
663static void wrong_exception_type(PyObject *exc)
664{
665    _Py_IDENTIFIER(__class__);
666    _Py_IDENTIFIER(__name__);
667    PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__);
668    if (type != NULL) {
669        PyObject *name = _PyObject_GetAttrId(type, &PyId___name__);
670        Py_DECREF(type);
671        if (name != NULL) {
672            PyErr_Format(PyExc_TypeError,
673                         "don't know how to handle %S in error callback", name);
674            Py_DECREF(name);
675        }
676    }
677}
678
679PyObject *PyCodec_StrictErrors(PyObject *exc)
680{
681    if (PyExceptionInstance_Check(exc))
682        PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
683    else
684        PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
685    return NULL;
686}
687
688
689PyObject *PyCodec_IgnoreErrors(PyObject *exc)
690{
691    Py_ssize_t end;
692    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
693        if (PyUnicodeEncodeError_GetEnd(exc, &end))
694            return NULL;
695    }
696    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
697        if (PyUnicodeDecodeError_GetEnd(exc, &end))
698            return NULL;
699    }
700    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
701        if (PyUnicodeTranslateError_GetEnd(exc, &end))
702            return NULL;
703    }
704    else {
705        wrong_exception_type(exc);
706        return NULL;
707    }
708    return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
709}
710
711
712PyObject *PyCodec_ReplaceErrors(PyObject *exc)
713{
714    Py_ssize_t start, end, i, len;
715
716    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
717        PyObject *res;
718        int kind;
719        void *data;
720        if (PyUnicodeEncodeError_GetStart(exc, &start))
721            return NULL;
722        if (PyUnicodeEncodeError_GetEnd(exc, &end))
723            return NULL;
724        len = end - start;
725        res = PyUnicode_New(len, '?');
726        if (res == NULL)
727            return NULL;
728        kind = PyUnicode_KIND(res);
729        data = PyUnicode_DATA(res);
730        for (i = 0; i < len; ++i)
731            PyUnicode_WRITE(kind, data, i, '?');
732        assert(_PyUnicode_CheckConsistency(res, 1));
733        return Py_BuildValue("(Nn)", res, end);
734    }
735    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
736        if (PyUnicodeDecodeError_GetEnd(exc, &end))
737            return NULL;
738        return Py_BuildValue("(Cn)",
739                             (int)Py_UNICODE_REPLACEMENT_CHARACTER,
740                             end);
741    }
742    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
743        PyObject *res;
744        int kind;
745        void *data;
746        if (PyUnicodeTranslateError_GetStart(exc, &start))
747            return NULL;
748        if (PyUnicodeTranslateError_GetEnd(exc, &end))
749            return NULL;
750        len = end - start;
751        res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
752        if (res == NULL)
753            return NULL;
754        kind = PyUnicode_KIND(res);
755        data = PyUnicode_DATA(res);
756        for (i=0; i < len; i++)
757            PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
758        assert(_PyUnicode_CheckConsistency(res, 1));
759        return Py_BuildValue("(Nn)", res, end);
760    }
761    else {
762        wrong_exception_type(exc);
763        return NULL;
764    }
765}
766
767PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
768{
769    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
770        PyObject *restuple;
771        PyObject *object;
772        Py_ssize_t i;
773        Py_ssize_t start;
774        Py_ssize_t end;
775        PyObject *res;
776        unsigned char *outp;
777        Py_ssize_t ressize;
778        Py_UCS4 ch;
779        if (PyUnicodeEncodeError_GetStart(exc, &start))
780            return NULL;
781        if (PyUnicodeEncodeError_GetEnd(exc, &end))
782            return NULL;
783        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
784            return NULL;
785        if (end - start > PY_SSIZE_T_MAX / (2+7+1))
786            end = start + PY_SSIZE_T_MAX / (2+7+1);
787        for (i = start, ressize = 0; i < end; ++i) {
788            /* object is guaranteed to be "ready" */
789            ch = PyUnicode_READ_CHAR(object, i);
790            if (ch<10)
791                ressize += 2+1+1;
792            else if (ch<100)
793                ressize += 2+2+1;
794            else if (ch<1000)
795                ressize += 2+3+1;
796            else if (ch<10000)
797                ressize += 2+4+1;
798            else if (ch<100000)
799                ressize += 2+5+1;
800            else if (ch<1000000)
801                ressize += 2+6+1;
802            else
803                ressize += 2+7+1;
804        }
805        /* allocate replacement */
806        res = PyUnicode_New(ressize, 127);
807        if (res == NULL) {
808            Py_DECREF(object);
809            return NULL;
810        }
811        outp = PyUnicode_1BYTE_DATA(res);
812        /* generate replacement */
813        for (i = start; i < end; ++i) {
814            int digits;
815            int base;
816            ch = PyUnicode_READ_CHAR(object, i);
817            *outp++ = '&';
818            *outp++ = '#';
819            if (ch<10) {
820                digits = 1;
821                base = 1;
822            }
823            else if (ch<100) {
824                digits = 2;
825                base = 10;
826            }
827            else if (ch<1000) {
828                digits = 3;
829                base = 100;
830            }
831            else if (ch<10000) {
832                digits = 4;
833                base = 1000;
834            }
835            else if (ch<100000) {
836                digits = 5;
837                base = 10000;
838            }
839            else if (ch<1000000) {
840                digits = 6;
841                base = 100000;
842            }
843            else {
844                digits = 7;
845                base = 1000000;
846            }
847            while (digits-->0) {
848                *outp++ = '0' + ch/base;
849                ch %= base;
850                base /= 10;
851            }
852            *outp++ = ';';
853        }
854        assert(_PyUnicode_CheckConsistency(res, 1));
855        restuple = Py_BuildValue("(Nn)", res, end);
856        Py_DECREF(object);
857        return restuple;
858    }
859    else {
860        wrong_exception_type(exc);
861        return NULL;
862    }
863}
864
865PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
866{
867    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
868        PyObject *restuple;
869        PyObject *object;
870        Py_ssize_t i;
871        Py_ssize_t start;
872        Py_ssize_t end;
873        PyObject *res;
874        unsigned char *outp;
875        Py_ssize_t ressize;
876        Py_UCS4 c;
877        if (PyUnicodeEncodeError_GetStart(exc, &start))
878            return NULL;
879        if (PyUnicodeEncodeError_GetEnd(exc, &end))
880            return NULL;
881        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
882            return NULL;
883        if (end - start > PY_SSIZE_T_MAX / (1+1+8))
884            end = start + PY_SSIZE_T_MAX / (1+1+8);
885        for (i = start, ressize = 0; i < end; ++i) {
886            /* object is guaranteed to be "ready" */
887            c = PyUnicode_READ_CHAR(object, i);
888            if (c >= 0x10000) {
889                ressize += 1+1+8;
890            }
891            else if (c >= 0x100) {
892                ressize += 1+1+4;
893            }
894            else
895                ressize += 1+1+2;
896        }
897        res = PyUnicode_New(ressize, 127);
898        if (res == NULL) {
899            Py_DECREF(object);
900            return NULL;
901        }
902        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
903            i < end; ++i) {
904            c = PyUnicode_READ_CHAR(object, i);
905            *outp++ = '\\';
906            if (c >= 0x00010000) {
907                *outp++ = 'U';
908                *outp++ = Py_hexdigits[(c>>28)&0xf];
909                *outp++ = Py_hexdigits[(c>>24)&0xf];
910                *outp++ = Py_hexdigits[(c>>20)&0xf];
911                *outp++ = Py_hexdigits[(c>>16)&0xf];
912                *outp++ = Py_hexdigits[(c>>12)&0xf];
913                *outp++ = Py_hexdigits[(c>>8)&0xf];
914            }
915            else if (c >= 0x100) {
916                *outp++ = 'u';
917                *outp++ = Py_hexdigits[(c>>12)&0xf];
918                *outp++ = Py_hexdigits[(c>>8)&0xf];
919            }
920            else
921                *outp++ = 'x';
922            *outp++ = Py_hexdigits[(c>>4)&0xf];
923            *outp++ = Py_hexdigits[c&0xf];
924        }
925
926        assert(_PyUnicode_CheckConsistency(res, 1));
927        restuple = Py_BuildValue("(Nn)", res, end);
928        Py_DECREF(object);
929        return restuple;
930    }
931    else {
932        wrong_exception_type(exc);
933        return NULL;
934    }
935}
936
937static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
938static int ucnhash_initialized = 0;
939
940PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
941{
942    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
943        PyObject *restuple;
944        PyObject *object;
945        Py_ssize_t i;
946        Py_ssize_t start;
947        Py_ssize_t end;
948        PyObject *res;
949        unsigned char *outp;
950        Py_ssize_t ressize;
951        int replsize;
952        Py_UCS4 c;
953        char buffer[256]; /* NAME_MAXLEN */
954        if (PyUnicodeEncodeError_GetStart(exc, &start))
955            return NULL;
956        if (PyUnicodeEncodeError_GetEnd(exc, &end))
957            return NULL;
958        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
959            return NULL;
960        if (!ucnhash_initialized) {
961            /* load the unicode data module */
962            ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
963                                            PyUnicodeData_CAPSULE_NAME, 1);
964            ucnhash_initialized = 1;
965        }
966        for (i = start, ressize = 0; i < end; ++i) {
967            /* object is guaranteed to be "ready" */
968            c = PyUnicode_READ_CHAR(object, i);
969            if (ucnhash_CAPI &&
970                ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
971                replsize = 1+1+1+strlen(buffer)+1;
972            }
973            else if (c >= 0x10000) {
974                replsize = 1+1+8;
975            }
976            else if (c >= 0x100) {
977                replsize = 1+1+4;
978            }
979            else
980                replsize = 1+1+2;
981            if (ressize > PY_SSIZE_T_MAX - replsize)
982                break;
983            ressize += replsize;
984        }
985        end = i;
986        res = PyUnicode_New(ressize, 127);
987        if (res==NULL)
988            return NULL;
989        for (i = start, outp = PyUnicode_1BYTE_DATA(res);
990            i < end; ++i) {
991            c = PyUnicode_READ_CHAR(object, i);
992            *outp++ = '\\';
993            if (ucnhash_CAPI &&
994                ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) {
995                *outp++ = 'N';
996                *outp++ = '{';
997                strcpy((char *)outp, buffer);
998                outp += strlen(buffer);
999                *outp++ = '}';
1000                continue;
1001            }
1002            if (c >= 0x00010000) {
1003                *outp++ = 'U';
1004                *outp++ = Py_hexdigits[(c>>28)&0xf];
1005                *outp++ = Py_hexdigits[(c>>24)&0xf];
1006                *outp++ = Py_hexdigits[(c>>20)&0xf];
1007                *outp++ = Py_hexdigits[(c>>16)&0xf];
1008                *outp++ = Py_hexdigits[(c>>12)&0xf];
1009                *outp++ = Py_hexdigits[(c>>8)&0xf];
1010            }
1011            else if (c >= 0x100) {
1012                *outp++ = 'u';
1013                *outp++ = Py_hexdigits[(c>>12)&0xf];
1014                *outp++ = Py_hexdigits[(c>>8)&0xf];
1015            }
1016            else
1017                *outp++ = 'x';
1018            *outp++ = Py_hexdigits[(c>>4)&0xf];
1019            *outp++ = Py_hexdigits[c&0xf];
1020        }
1021
1022        assert(out == start + ressize);
1023        assert(_PyUnicode_CheckConsistency(res, 1));
1024        restuple = Py_BuildValue("(Nn)", res, end);
1025        Py_DECREF(object);
1026        return restuple;
1027    }
1028    else {
1029        wrong_exception_type(exc);
1030        return NULL;
1031    }
1032}
1033
1034#define ENC_UNKNOWN     -1
1035#define ENC_UTF8        0
1036#define ENC_UTF16BE     1
1037#define ENC_UTF16LE     2
1038#define ENC_UTF32BE     3
1039#define ENC_UTF32LE     4
1040
1041static int
1042get_standard_encoding(const char *encoding, int *bytelength)
1043{
1044    if (Py_TOLOWER(encoding[0]) == 'u' &&
1045        Py_TOLOWER(encoding[1]) == 't' &&
1046        Py_TOLOWER(encoding[2]) == 'f') {
1047        encoding += 3;
1048        if (*encoding == '-' || *encoding == '_' )
1049            encoding++;
1050        if (encoding[0] == '8' && encoding[1] == '\0') {
1051            *bytelength = 3;
1052            return ENC_UTF8;
1053        }
1054        else if (encoding[0] == '1' && encoding[1] == '6') {
1055            encoding += 2;
1056            *bytelength = 2;
1057            if (*encoding == '\0') {
1058#ifdef WORDS_BIGENDIAN
1059                return ENC_UTF16BE;
1060#else
1061                return ENC_UTF16LE;
1062#endif
1063            }
1064            if (*encoding == '-' || *encoding == '_' )
1065                encoding++;
1066            if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
1067                if (Py_TOLOWER(encoding[0]) == 'b')
1068                    return ENC_UTF16BE;
1069                if (Py_TOLOWER(encoding[0]) == 'l')
1070                    return ENC_UTF16LE;
1071            }
1072        }
1073        else if (encoding[0] == '3' && encoding[1] == '2') {
1074            encoding += 2;
1075            *bytelength = 4;
1076            if (*encoding == '\0') {
1077#ifdef WORDS_BIGENDIAN
1078                return ENC_UTF32BE;
1079#else
1080                return ENC_UTF32LE;
1081#endif
1082            }
1083            if (*encoding == '-' || *encoding == '_' )
1084                encoding++;
1085            if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
1086                if (Py_TOLOWER(encoding[0]) == 'b')
1087                    return ENC_UTF32BE;
1088                if (Py_TOLOWER(encoding[0]) == 'l')
1089                    return ENC_UTF32LE;
1090            }
1091        }
1092    }
1093    else if (strcmp(encoding, "CP_UTF8") == 0) {
1094        *bytelength = 3;
1095        return ENC_UTF8;
1096    }
1097    return ENC_UNKNOWN;
1098}
1099
1100/* This handler is declared static until someone demonstrates
1101   a need to call it directly. */
1102static PyObject *
1103PyCodec_SurrogatePassErrors(PyObject *exc)
1104{
1105    PyObject *restuple;
1106    PyObject *object;
1107    PyObject *encode;
1108    char *encoding;
1109    int code;
1110    int bytelength;
1111    Py_ssize_t i;
1112    Py_ssize_t start;
1113    Py_ssize_t end;
1114    PyObject *res;
1115    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
1116        unsigned char *outp;
1117        if (PyUnicodeEncodeError_GetStart(exc, &start))
1118            return NULL;
1119        if (PyUnicodeEncodeError_GetEnd(exc, &end))
1120            return NULL;
1121        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1122            return NULL;
1123        if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
1124            Py_DECREF(object);
1125            return NULL;
1126        }
1127        if (!(encoding = PyUnicode_AsUTF8(encode))) {
1128            Py_DECREF(object);
1129            Py_DECREF(encode);
1130            return NULL;
1131        }
1132        code = get_standard_encoding(encoding, &bytelength);
1133        Py_DECREF(encode);
1134        if (code == ENC_UNKNOWN) {
1135            /* Not supported, fail with original exception */
1136            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1137            Py_DECREF(object);
1138            return NULL;
1139        }
1140
1141        if (end - start > PY_SSIZE_T_MAX / bytelength)
1142            end = start + PY_SSIZE_T_MAX / bytelength;
1143        res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
1144        if (!res) {
1145            Py_DECREF(object);
1146            return NULL;
1147        }
1148        outp = (unsigned char*)PyBytes_AsString(res);
1149        for (i = start; i < end; i++) {
1150            /* object is guaranteed to be "ready" */
1151            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
1152            if (!Py_UNICODE_IS_SURROGATE(ch)) {
1153                /* Not a surrogate, fail with original exception */
1154                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1155                Py_DECREF(res);
1156                Py_DECREF(object);
1157                return NULL;
1158            }
1159            switch (code) {
1160            case ENC_UTF8:
1161                *outp++ = (unsigned char)(0xe0 | (ch >> 12));
1162                *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f));
1163                *outp++ = (unsigned char)(0x80 | (ch & 0x3f));
1164                break;
1165            case ENC_UTF16LE:
1166                *outp++ = (unsigned char) ch;
1167                *outp++ = (unsigned char)(ch >> 8);
1168                break;
1169            case ENC_UTF16BE:
1170                *outp++ = (unsigned char)(ch >> 8);
1171                *outp++ = (unsigned char) ch;
1172                break;
1173            case ENC_UTF32LE:
1174                *outp++ = (unsigned char) ch;
1175                *outp++ = (unsigned char)(ch >> 8);
1176                *outp++ = (unsigned char)(ch >> 16);
1177                *outp++ = (unsigned char)(ch >> 24);
1178                break;
1179            case ENC_UTF32BE:
1180                *outp++ = (unsigned char)(ch >> 24);
1181                *outp++ = (unsigned char)(ch >> 16);
1182                *outp++ = (unsigned char)(ch >> 8);
1183                *outp++ = (unsigned char) ch;
1184                break;
1185            }
1186        }
1187        restuple = Py_BuildValue("(On)", res, end);
1188        Py_DECREF(res);
1189        Py_DECREF(object);
1190        return restuple;
1191    }
1192    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
1193        unsigned char *p;
1194        Py_UCS4 ch = 0;
1195        if (PyUnicodeDecodeError_GetStart(exc, &start))
1196            return NULL;
1197        if (PyUnicodeDecodeError_GetEnd(exc, &end))
1198            return NULL;
1199        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1200            return NULL;
1201        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
1202            Py_DECREF(object);
1203            return NULL;
1204        }
1205        if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
1206            Py_DECREF(object);
1207            return NULL;
1208        }
1209        if (!(encoding = PyUnicode_AsUTF8(encode))) {
1210            Py_DECREF(object);
1211            Py_DECREF(encode);
1212            return NULL;
1213        }
1214        code = get_standard_encoding(encoding, &bytelength);
1215        Py_DECREF(encode);
1216        if (code == ENC_UNKNOWN) {
1217            /* Not supported, fail with original exception */
1218            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1219            Py_DECREF(object);
1220            return NULL;
1221        }
1222
1223        /* Try decoding a single surrogate character. If
1224           there are more, let the codec call us again. */
1225        p += start;
1226        if (PyBytes_GET_SIZE(object) - start >= bytelength) {
1227            switch (code) {
1228            case ENC_UTF8:
1229                if ((p[0] & 0xf0) == 0xe0 &&
1230                    (p[1] & 0xc0) == 0x80 &&
1231                    (p[2] & 0xc0) == 0x80) {
1232                    /* it's a three-byte code */
1233                    ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
1234                }
1235                break;
1236            case ENC_UTF16LE:
1237                ch = p[1] << 8 | p[0];
1238                break;
1239            case ENC_UTF16BE:
1240                ch = p[0] << 8 | p[1];
1241                break;
1242            case ENC_UTF32LE:
1243                ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
1244                break;
1245            case ENC_UTF32BE:
1246                ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
1247                break;
1248            }
1249        }
1250
1251        Py_DECREF(object);
1252        if (!Py_UNICODE_IS_SURROGATE(ch)) {
1253            /* it's not a surrogate - fail */
1254            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1255            return NULL;
1256        }
1257        res = PyUnicode_FromOrdinal(ch);
1258        if (res == NULL)
1259            return NULL;
1260        return Py_BuildValue("(Nn)", res, start + bytelength);
1261    }
1262    else {
1263        wrong_exception_type(exc);
1264        return NULL;
1265    }
1266}
1267
1268static PyObject *
1269PyCodec_SurrogateEscapeErrors(PyObject *exc)
1270{
1271    PyObject *restuple;
1272    PyObject *object;
1273    Py_ssize_t i;
1274    Py_ssize_t start;
1275    Py_ssize_t end;
1276    PyObject *res;
1277    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
1278        char *outp;
1279        if (PyUnicodeEncodeError_GetStart(exc, &start))
1280            return NULL;
1281        if (PyUnicodeEncodeError_GetEnd(exc, &end))
1282            return NULL;
1283        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
1284            return NULL;
1285        res = PyBytes_FromStringAndSize(NULL, end-start);
1286        if (!res) {
1287            Py_DECREF(object);
1288            return NULL;
1289        }
1290        outp = PyBytes_AsString(res);
1291        for (i = start; i < end; i++) {
1292            /* object is guaranteed to be "ready" */
1293            Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
1294            if (ch < 0xdc80 || ch > 0xdcff) {
1295                /* Not a UTF-8b surrogate, fail with original exception */
1296                PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1297                Py_DECREF(res);
1298                Py_DECREF(object);
1299                return NULL;
1300            }
1301            *outp++ = ch - 0xdc00;
1302        }
1303        restuple = Py_BuildValue("(On)", res, end);
1304        Py_DECREF(res);
1305        Py_DECREF(object);
1306        return restuple;
1307    }
1308    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
1309        PyObject *str;
1310        unsigned char *p;
1311        Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
1312        int consumed = 0;
1313        if (PyUnicodeDecodeError_GetStart(exc, &start))
1314            return NULL;
1315        if (PyUnicodeDecodeError_GetEnd(exc, &end))
1316            return NULL;
1317        if (!(object = PyUnicodeDecodeError_GetObject(exc)))
1318            return NULL;
1319        if (!(p = (unsigned char*)PyBytes_AsString(object))) {
1320            Py_DECREF(object);
1321            return NULL;
1322        }
1323        while (consumed < 4 && consumed < end-start) {
1324            /* Refuse to escape ASCII bytes. */
1325            if (p[start+consumed] < 128)
1326                break;
1327            ch[consumed] = 0xdc00 + p[start+consumed];
1328            consumed++;
1329        }
1330        Py_DECREF(object);
1331        if (!consumed) {
1332            /* codec complained about ASCII byte. */
1333            PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
1334            return NULL;
1335        }
1336        str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
1337        if (str == NULL)
1338            return NULL;
1339        return Py_BuildValue("(Nn)", str, start+consumed);
1340    }
1341    else {
1342        wrong_exception_type(exc);
1343        return NULL;
1344    }
1345}
1346
1347
1348static PyObject *strict_errors(PyObject *self, PyObject *exc)
1349{
1350    return PyCodec_StrictErrors(exc);
1351}
1352
1353
1354static PyObject *ignore_errors(PyObject *self, PyObject *exc)
1355{
1356    return PyCodec_IgnoreErrors(exc);
1357}
1358
1359
1360static PyObject *replace_errors(PyObject *self, PyObject *exc)
1361{
1362    return PyCodec_ReplaceErrors(exc);
1363}
1364
1365
1366static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
1367{
1368    return PyCodec_XMLCharRefReplaceErrors(exc);
1369}
1370
1371
1372static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
1373{
1374    return PyCodec_BackslashReplaceErrors(exc);
1375}
1376
1377static PyObject *namereplace_errors(PyObject *self, PyObject *exc)
1378{
1379    return PyCodec_NameReplaceErrors(exc);
1380}
1381
1382static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
1383{
1384    return PyCodec_SurrogatePassErrors(exc);
1385}
1386
1387static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
1388{
1389    return PyCodec_SurrogateEscapeErrors(exc);
1390}
1391
1392static int _PyCodecRegistry_Init(void)
1393{
1394    static struct {
1395        char *name;
1396        PyMethodDef def;
1397    } methods[] =
1398    {
1399        {
1400            "strict",
1401            {
1402                "strict_errors",
1403                strict_errors,
1404                METH_O,
1405                PyDoc_STR("Implements the 'strict' error handling, which "
1406                          "raises a UnicodeError on coding errors.")
1407            }
1408        },
1409        {
1410            "ignore",
1411            {
1412                "ignore_errors",
1413                ignore_errors,
1414                METH_O,
1415                PyDoc_STR("Implements the 'ignore' error handling, which "
1416                          "ignores malformed data and continues.")
1417            }
1418        },
1419        {
1420            "replace",
1421            {
1422                "replace_errors",
1423                replace_errors,
1424                METH_O,
1425                PyDoc_STR("Implements the 'replace' error handling, which "
1426                          "replaces malformed data with a replacement marker.")
1427            }
1428        },
1429        {
1430            "xmlcharrefreplace",
1431            {
1432                "xmlcharrefreplace_errors",
1433                xmlcharrefreplace_errors,
1434                METH_O,
1435                PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
1436                          "which replaces an unencodable character with the "
1437                          "appropriate XML character reference.")
1438            }
1439        },
1440        {
1441            "backslashreplace",
1442            {
1443                "backslashreplace_errors",
1444                backslashreplace_errors,
1445                METH_O,
1446                PyDoc_STR("Implements the 'backslashreplace' error handling, "
1447                          "which replaces an unencodable character with a "
1448                          "backslashed escape sequence.")
1449            }
1450        },
1451        {
1452            "namereplace",
1453            {
1454                "namereplace_errors",
1455                namereplace_errors,
1456                METH_O,
1457                PyDoc_STR("Implements the 'namereplace' error handling, "
1458                          "which replaces an unencodable character with a "
1459                          "\\N{...} escape sequence.")
1460            }
1461        },
1462        {
1463            "surrogatepass",
1464            {
1465                "surrogatepass",
1466                surrogatepass_errors,
1467                METH_O
1468            }
1469        },
1470        {
1471            "surrogateescape",
1472            {
1473                "surrogateescape",
1474                surrogateescape_errors,
1475                METH_O
1476            }
1477        }
1478    };
1479
1480    PyInterpreterState *interp = PyThreadState_GET()->interp;
1481    PyObject *mod;
1482    unsigned i;
1483
1484    if (interp->codec_search_path != NULL)
1485        return 0;
1486
1487    interp->codec_search_path = PyList_New(0);
1488    interp->codec_search_cache = PyDict_New();
1489    interp->codec_error_registry = PyDict_New();
1490
1491    if (interp->codec_error_registry) {
1492        for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
1493            PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
1494            int res;
1495            if (!func)
1496                Py_FatalError("can't initialize codec error registry");
1497            res = PyCodec_RegisterError(methods[i].name, func);
1498            Py_DECREF(func);
1499            if (res)
1500                Py_FatalError("can't initialize codec error registry");
1501        }
1502    }
1503
1504    if (interp->codec_search_path == NULL ||
1505        interp->codec_search_cache == NULL ||
1506        interp->codec_error_registry == NULL)
1507        Py_FatalError("can't initialize codec registry");
1508
1509    mod = PyImport_ImportModuleNoBlock("encodings");
1510    if (mod == NULL) {
1511        return -1;
1512    }
1513    Py_DECREF(mod);
1514    interp->codecs_initialized = 1;
1515    return 0;
1516}
1517