1/* ------------------------------------------------------------------------
2
3   Python Codec Registry and support functions
4
5Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7Copyright (c) Corporation for National Research Initiatives.
8
9   ------------------------------------------------------------------------ */
10
11#include "Python.h"
12#include <ctype.h>
13
14/* --- Codec Registry ----------------------------------------------------- */
15
16/* Import the standard encodings package which will register the first
17   codec search function.
18
19   This is done in a lazy way so that the Unicode implementation does
20   not downgrade startup time of scripts not needing it.
21
22   ImportErrors are silently ignored by this function. Only one try is
23   made.
24
25*/
26
27static int _PyCodecRegistry_Init(void); /* Forward */
28
29int PyCodec_Register(PyObject *search_function)
30{
31    PyInterpreterState *interp = PyThreadState_GET()->interp;
32    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
33        goto onError;
34    if (search_function == NULL) {
35        PyErr_BadArgument();
36        goto onError;
37    }
38    if (!PyCallable_Check(search_function)) {
39        PyErr_SetString(PyExc_TypeError, "argument must be callable");
40        goto onError;
41    }
42    return PyList_Append(interp->codec_search_path, search_function);
43
44 onError:
45    return -1;
46}
47
48/* Convert a string to a normalized Python string: all characters are
49   converted to lower case, spaces are replaced with underscores. */
50
51static
52PyObject *normalizestring(const char *string)
53{
54    register size_t i;
55    size_t len = strlen(string);
56    char *p;
57    PyObject *v;
58
59    if (len > PY_SSIZE_T_MAX) {
60        PyErr_SetString(PyExc_OverflowError, "string is too large");
61        return NULL;
62    }
63
64    v = PyString_FromStringAndSize(NULL, len);
65    if (v == NULL)
66        return NULL;
67    p = PyString_AS_STRING(v);
68    for (i = 0; i < len; i++) {
69        register char ch = string[i];
70        if (ch == ' ')
71            ch = '-';
72        else
73            ch = Py_TOLOWER(Py_CHARMASK(ch));
74        p[i] = ch;
75    }
76    return v;
77}
78
79/* Lookup the given encoding and return a tuple providing the codec
80   facilities.
81
82   The encoding string is looked up converted to all lower-case
83   characters. This makes encodings looked up through this mechanism
84   effectively case-insensitive.
85
86   If no codec is found, a LookupError is set and NULL returned.
87
88   As side effect, this tries to load the encodings package, if not
89   yet done. This is part of the lazy load strategy for the encodings
90   package.
91
92*/
93
94PyObject *_PyCodec_Lookup(const char *encoding)
95{
96    PyInterpreterState *interp;
97    PyObject *result, *args = NULL, *v;
98    Py_ssize_t i, len;
99
100    if (encoding == NULL) {
101        PyErr_BadArgument();
102        goto onError;
103    }
104
105    interp = PyThreadState_GET()->interp;
106    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
107        goto onError;
108
109    /* Convert the encoding to a normalized Python string: all
110       characters are converted to lower case, spaces and hyphens are
111       replaced with underscores. */
112    v = normalizestring(encoding);
113    if (v == NULL)
114        goto onError;
115    PyString_InternInPlace(&v);
116
117    /* First, try to lookup the name in the registry dictionary */
118    result = PyDict_GetItem(interp->codec_search_cache, v);
119    if (result != NULL) {
120        Py_INCREF(result);
121        Py_DECREF(v);
122        return result;
123    }
124
125    /* Next, scan the search functions in order of registration */
126    args = PyTuple_New(1);
127    if (args == NULL)
128        goto onError;
129    PyTuple_SET_ITEM(args,0,v);
130
131    len = PyList_Size(interp->codec_search_path);
132    if (len < 0)
133        goto onError;
134    if (len == 0) {
135        PyErr_SetString(PyExc_LookupError,
136                        "no codec search functions registered: "
137                        "can't find encoding");
138        goto onError;
139    }
140
141    for (i = 0; i < len; i++) {
142        PyObject *func;
143
144        func = PyList_GetItem(interp->codec_search_path, i);
145        if (func == NULL)
146            goto onError;
147        result = PyEval_CallObject(func, args);
148        if (result == NULL)
149            goto onError;
150        if (result == Py_None) {
151            Py_DECREF(result);
152            continue;
153        }
154        if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
155            PyErr_SetString(PyExc_TypeError,
156                            "codec search functions must return 4-tuples");
157            Py_DECREF(result);
158            goto onError;
159        }
160        break;
161    }
162    if (i == len) {
163        /* XXX Perhaps we should cache misses too ? */
164        PyErr_Format(PyExc_LookupError,
165                     "unknown encoding: %s", encoding);
166        goto onError;
167    }
168
169    /* Cache and return the result */
170    PyDict_SetItem(interp->codec_search_cache, v, result);
171    Py_DECREF(args);
172    return result;
173
174 onError:
175    Py_XDECREF(args);
176    return NULL;
177}
178
179static
180PyObject *args_tuple(PyObject *object,
181                     const char *errors)
182{
183    PyObject *args;
184
185    args = PyTuple_New(1 + (errors != NULL));
186    if (args == NULL)
187        return NULL;
188    Py_INCREF(object);
189    PyTuple_SET_ITEM(args,0,object);
190    if (errors) {
191        PyObject *v;
192
193        v = PyString_FromString(errors);
194        if (v == NULL) {
195            Py_DECREF(args);
196            return NULL;
197        }
198        PyTuple_SET_ITEM(args, 1, v);
199    }
200    return args;
201}
202
203/* Helper function to get a codec item */
204
205static
206PyObject *codec_getitem(const char *encoding, int index)
207{
208    PyObject *codecs;
209    PyObject *v;
210
211    codecs = _PyCodec_Lookup(encoding);
212    if (codecs == NULL)
213        return NULL;
214    v = PyTuple_GET_ITEM(codecs, index);
215    Py_DECREF(codecs);
216    Py_INCREF(v);
217    return v;
218}
219
220/* Helper function to create an incremental codec. */
221
222static
223PyObject *codec_getincrementalcodec(const char *encoding,
224                                    const char *errors,
225                                    const char *attrname)
226{
227    PyObject *codecs, *ret, *inccodec;
228
229    codecs = _PyCodec_Lookup(encoding);
230    if (codecs == NULL)
231        return NULL;
232    inccodec = PyObject_GetAttrString(codecs, attrname);
233    Py_DECREF(codecs);
234    if (inccodec == NULL)
235        return NULL;
236    if (errors)
237        ret = PyObject_CallFunction(inccodec, "s", errors);
238    else
239        ret = PyObject_CallFunction(inccodec, NULL);
240    Py_DECREF(inccodec);
241    return ret;
242}
243
244/* Helper function to create a stream codec. */
245
246static
247PyObject *codec_getstreamcodec(const char *encoding,
248                               PyObject *stream,
249                               const char *errors,
250                               const int index)
251{
252    PyObject *codecs, *streamcodec, *codeccls;
253
254    codecs = _PyCodec_Lookup(encoding);
255    if (codecs == NULL)
256        return NULL;
257
258    codeccls = PyTuple_GET_ITEM(codecs, index);
259    if (errors != NULL)
260        streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
261    else
262        streamcodec = PyObject_CallFunction(codeccls, "O", stream);
263    Py_DECREF(codecs);
264    return streamcodec;
265}
266
267/* Convenience APIs to query the Codec registry.
268
269   All APIs return a codec object with incremented refcount.
270
271 */
272
273PyObject *PyCodec_Encoder(const char *encoding)
274{
275    return codec_getitem(encoding, 0);
276}
277
278PyObject *PyCodec_Decoder(const char *encoding)
279{
280    return codec_getitem(encoding, 1);
281}
282
283PyObject *PyCodec_IncrementalEncoder(const char *encoding,
284                                     const char *errors)
285{
286    return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
287}
288
289PyObject *PyCodec_IncrementalDecoder(const char *encoding,
290                                     const char *errors)
291{
292    return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
293}
294
295PyObject *PyCodec_StreamReader(const char *encoding,
296                               PyObject *stream,
297                               const char *errors)
298{
299    return codec_getstreamcodec(encoding, stream, errors, 2);
300}
301
302PyObject *PyCodec_StreamWriter(const char *encoding,
303                               PyObject *stream,
304                               const char *errors)
305{
306    return codec_getstreamcodec(encoding, stream, errors, 3);
307}
308
309/* Encode an object (e.g. an Unicode object) using the given encoding
310   and return the resulting encoded object (usually a Python string).
311
312   errors is passed to the encoder factory as argument if non-NULL. */
313
314PyObject *PyCodec_Encode(PyObject *object,
315                         const char *encoding,
316                         const char *errors)
317{
318    PyObject *encoder = NULL;
319    PyObject *args = NULL, *result = NULL;
320    PyObject *v;
321
322    encoder = PyCodec_Encoder(encoding);
323    if (encoder == NULL)
324        goto onError;
325
326    args = args_tuple(object, errors);
327    if (args == NULL)
328        goto onError;
329
330    result = PyEval_CallObject(encoder,args);
331    if (result == NULL)
332        goto onError;
333
334    if (!PyTuple_Check(result) ||
335        PyTuple_GET_SIZE(result) != 2) {
336        PyErr_SetString(PyExc_TypeError,
337                        "encoder must return a tuple (object,integer)");
338        goto onError;
339    }
340    v = PyTuple_GET_ITEM(result,0);
341    Py_INCREF(v);
342    /* We don't check or use the second (integer) entry. */
343
344    Py_DECREF(args);
345    Py_DECREF(encoder);
346    Py_DECREF(result);
347    return v;
348
349 onError:
350    Py_XDECREF(result);
351    Py_XDECREF(args);
352    Py_XDECREF(encoder);
353    return NULL;
354}
355
356/* Decode an object (usually a Python string) using the given encoding
357   and return an equivalent object (e.g. an Unicode object).
358
359   errors is passed to the decoder factory as argument if non-NULL. */
360
361PyObject *PyCodec_Decode(PyObject *object,
362                         const char *encoding,
363                         const char *errors)
364{
365    PyObject *decoder = NULL;
366    PyObject *args = NULL, *result = NULL;
367    PyObject *v;
368
369    decoder = PyCodec_Decoder(encoding);
370    if (decoder == NULL)
371        goto onError;
372
373    args = args_tuple(object, errors);
374    if (args == NULL)
375        goto onError;
376
377    result = PyEval_CallObject(decoder,args);
378    if (result == NULL)
379        goto onError;
380    if (!PyTuple_Check(result) ||
381        PyTuple_GET_SIZE(result) != 2) {
382        PyErr_SetString(PyExc_TypeError,
383                        "decoder must return a tuple (object,integer)");
384        goto onError;
385    }
386    v = PyTuple_GET_ITEM(result,0);
387    Py_INCREF(v);
388    /* We don't check or use the second (integer) entry. */
389
390    Py_DECREF(args);
391    Py_DECREF(decoder);
392    Py_DECREF(result);
393    return v;
394
395 onError:
396    Py_XDECREF(args);
397    Py_XDECREF(decoder);
398    Py_XDECREF(result);
399    return NULL;
400}
401
402/* Register the error handling callback function error under the name
403   name. This function will be called by the codec when it encounters
404   an unencodable characters/undecodable bytes and doesn't know the
405   callback name, when name is specified as the error parameter
406   in the call to the encode/decode function.
407   Return 0 on success, -1 on error */
408int PyCodec_RegisterError(const char *name, PyObject *error)
409{
410    PyInterpreterState *interp = PyThreadState_GET()->interp;
411    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
412        return -1;
413    if (!PyCallable_Check(error)) {
414        PyErr_SetString(PyExc_TypeError, "handler must be callable");
415        return -1;
416    }
417    return PyDict_SetItemString(interp->codec_error_registry,
418                                (char *)name, error);
419}
420
421/* Lookup the error handling callback function registered under the
422   name error. As a special case NULL can be passed, in which case
423   the error handling callback for strict encoding will be returned. */
424PyObject *PyCodec_LookupError(const char *name)
425{
426    PyObject *handler = NULL;
427
428    PyInterpreterState *interp = PyThreadState_GET()->interp;
429    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
430        return NULL;
431
432    if (name==NULL)
433        name = "strict";
434    handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
435    if (!handler)
436        PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
437    else
438        Py_INCREF(handler);
439    return handler;
440}
441
442static void wrong_exception_type(PyObject *exc)
443{
444    PyObject *type = PyObject_GetAttrString(exc, "__class__");
445    if (type != NULL) {
446        PyObject *name = PyObject_GetAttrString(type, "__name__");
447        Py_DECREF(type);
448        if (name != NULL) {
449            PyObject *string = PyObject_Str(name);
450            Py_DECREF(name);
451            if (string != NULL) {
452                PyErr_Format(PyExc_TypeError,
453                    "don't know how to handle %.400s in error callback",
454                    PyString_AS_STRING(string));
455                Py_DECREF(string);
456            }
457        }
458    }
459}
460
461PyObject *PyCodec_StrictErrors(PyObject *exc)
462{
463    if (PyExceptionInstance_Check(exc))
464        PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
465    else
466        PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
467    return NULL;
468}
469
470
471#ifdef Py_USING_UNICODE
472PyObject *PyCodec_IgnoreErrors(PyObject *exc)
473{
474    Py_ssize_t end;
475    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
476        if (PyUnicodeEncodeError_GetEnd(exc, &end))
477            return NULL;
478    }
479    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
480        if (PyUnicodeDecodeError_GetEnd(exc, &end))
481            return NULL;
482    }
483    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
484        if (PyUnicodeTranslateError_GetEnd(exc, &end))
485            return NULL;
486    }
487    else {
488        wrong_exception_type(exc);
489        return NULL;
490    }
491    /* ouch: passing NULL, 0, pos gives None instead of u'' */
492    return Py_BuildValue("(u#n)", &end, 0, end);
493}
494
495
496PyObject *PyCodec_ReplaceErrors(PyObject *exc)
497{
498    PyObject *restuple;
499    Py_ssize_t start;
500    Py_ssize_t end;
501    Py_ssize_t i;
502
503    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
504        PyObject *res;
505        Py_UNICODE *p;
506        if (PyUnicodeEncodeError_GetStart(exc, &start))
507            return NULL;
508        if (PyUnicodeEncodeError_GetEnd(exc, &end))
509            return NULL;
510        res = PyUnicode_FromUnicode(NULL, end-start);
511        if (res == NULL)
512            return NULL;
513        for (p = PyUnicode_AS_UNICODE(res), i = start;
514            i<end; ++p, ++i)
515            *p = '?';
516        restuple = Py_BuildValue("(On)", res, end);
517        Py_DECREF(res);
518        return restuple;
519    }
520    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
521        Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
522        if (PyUnicodeDecodeError_GetEnd(exc, &end))
523            return NULL;
524        return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
525    }
526    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
527        PyObject *res;
528        Py_UNICODE *p;
529        if (PyUnicodeTranslateError_GetStart(exc, &start))
530            return NULL;
531        if (PyUnicodeTranslateError_GetEnd(exc, &end))
532            return NULL;
533        res = PyUnicode_FromUnicode(NULL, end-start);
534        if (res == NULL)
535            return NULL;
536        for (p = PyUnicode_AS_UNICODE(res), i = start;
537            i<end; ++p, ++i)
538            *p = Py_UNICODE_REPLACEMENT_CHARACTER;
539        restuple = Py_BuildValue("(On)", res, end);
540        Py_DECREF(res);
541        return restuple;
542    }
543    else {
544        wrong_exception_type(exc);
545        return NULL;
546    }
547}
548
549PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
550{
551    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
552        PyObject *restuple;
553        PyObject *object;
554        Py_ssize_t start;
555        Py_ssize_t end;
556        PyObject *res;
557        Py_UNICODE *p;
558        Py_UNICODE *startp;
559        Py_UNICODE *e;
560        Py_UNICODE *outp;
561        Py_ssize_t ressize;
562        if (PyUnicodeEncodeError_GetStart(exc, &start))
563            return NULL;
564        if (PyUnicodeEncodeError_GetEnd(exc, &end))
565            return NULL;
566        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
567            return NULL;
568        startp = PyUnicode_AS_UNICODE(object);
569        if (end - start > PY_SSIZE_T_MAX / (2+7+1)) {
570            end = start + PY_SSIZE_T_MAX / (2+7+1);
571#ifndef Py_UNICODE_WIDE
572            if (0xD800 <= startp[end - 1] && startp[end - 1] <= 0xDBFF)
573                end--;
574#endif
575        }
576        e = startp + end;
577        for (p = startp+start, ressize = 0; p < e;) {
578            Py_UCS4 ch = *p++;
579#ifndef Py_UNICODE_WIDE
580            if ((0xD800 <= ch && ch <= 0xDBFF) &&
581                (p < e) &&
582                (0xDC00 <= *p && *p <= 0xDFFF)) {
583                ch = ((((ch & 0x03FF) << 10) |
584                       ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
585            }
586#endif
587            if (ch < 10)
588                ressize += 2+1+1;
589            else if (ch < 100)
590                ressize += 2+2+1;
591            else if (ch < 1000)
592                ressize += 2+3+1;
593            else if (ch < 10000)
594                ressize += 2+4+1;
595            else if (ch < 100000)
596                ressize += 2+5+1;
597            else if (ch < 1000000)
598                ressize += 2+6+1;
599            else
600                ressize += 2+7+1;
601        }
602        /* allocate replacement */
603        res = PyUnicode_FromUnicode(NULL, ressize);
604        if (res == NULL) {
605            Py_DECREF(object);
606            return NULL;
607        }
608        /* generate replacement */
609        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
610            int digits;
611            int base;
612            Py_UCS4 ch = *p++;
613#ifndef Py_UNICODE_WIDE
614            if ((0xD800 <= ch && ch <= 0xDBFF) &&
615                (p < startp+end) &&
616                (0xDC00 <= *p && *p <= 0xDFFF)) {
617                ch = ((((ch & 0x03FF) << 10) |
618                       ((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
619            }
620#endif
621            *outp++ = '&';
622            *outp++ = '#';
623            if (ch < 10) {
624                digits = 1;
625                base = 1;
626            }
627            else if (ch < 100) {
628                digits = 2;
629                base = 10;
630            }
631            else if (ch < 1000) {
632                digits = 3;
633                base = 100;
634            }
635            else if (ch < 10000) {
636                digits = 4;
637                base = 1000;
638            }
639            else if (ch < 100000) {
640                digits = 5;
641                base = 10000;
642            }
643            else if (ch < 1000000) {
644                digits = 6;
645                base = 100000;
646            }
647            else {
648                digits = 7;
649                base = 1000000;
650            }
651            while (digits-->0) {
652                *outp++ = '0' + ch/base;
653                ch %= base;
654                base /= 10;
655            }
656            *outp++ = ';';
657        }
658        restuple = Py_BuildValue("(On)", res, end);
659        Py_DECREF(res);
660        Py_DECREF(object);
661        return restuple;
662    }
663    else {
664        wrong_exception_type(exc);
665        return NULL;
666    }
667}
668
669static Py_UNICODE hexdigits[] = {
670    '0', '1', '2', '3', '4', '5', '6', '7',
671    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
672};
673
674PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
675{
676    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
677        PyObject *restuple;
678        PyObject *object;
679        Py_ssize_t start;
680        Py_ssize_t end;
681        PyObject *res;
682        Py_UNICODE *p;
683        Py_UNICODE *startp;
684        Py_UNICODE *outp;
685        Py_ssize_t ressize;
686        if (PyUnicodeEncodeError_GetStart(exc, &start))
687            return NULL;
688        if (PyUnicodeEncodeError_GetEnd(exc, &end))
689            return NULL;
690        if (!(object = PyUnicodeEncodeError_GetObject(exc)))
691            return NULL;
692        if (end - start > PY_SSIZE_T_MAX / (1+1+8))
693            end = start + PY_SSIZE_T_MAX / (1+1+8);
694        startp = PyUnicode_AS_UNICODE(object);
695        for (p = startp+start, ressize = 0; p < startp+end; ++p) {
696#ifdef Py_UNICODE_WIDE
697            if (*p >= 0x00010000)
698                ressize += 1+1+8;
699            else
700#endif
701            if (*p >= 0x100) {
702                ressize += 1+1+4;
703            }
704            else
705                ressize += 1+1+2;
706        }
707        res = PyUnicode_FromUnicode(NULL, ressize);
708        if (res == NULL) {
709            Py_DECREF(object);
710            return NULL;
711        }
712        for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
713            p < startp+end; ++p) {
714            Py_UNICODE c = *p;
715            *outp++ = '\\';
716#ifdef Py_UNICODE_WIDE
717            if (c >= 0x00010000) {
718                *outp++ = 'U';
719                *outp++ = hexdigits[(c>>28)&0xf];
720                *outp++ = hexdigits[(c>>24)&0xf];
721                *outp++ = hexdigits[(c>>20)&0xf];
722                *outp++ = hexdigits[(c>>16)&0xf];
723                *outp++ = hexdigits[(c>>12)&0xf];
724                *outp++ = hexdigits[(c>>8)&0xf];
725            }
726            else
727#endif
728            if (c >= 0x100) {
729                *outp++ = 'u';
730                *outp++ = hexdigits[(c>>12)&0xf];
731                *outp++ = hexdigits[(c>>8)&0xf];
732            }
733            else
734                *outp++ = 'x';
735            *outp++ = hexdigits[(c>>4)&0xf];
736            *outp++ = hexdigits[c&0xf];
737        }
738
739        restuple = Py_BuildValue("(On)", res, end);
740        Py_DECREF(res);
741        Py_DECREF(object);
742        return restuple;
743    }
744    else {
745        wrong_exception_type(exc);
746        return NULL;
747    }
748}
749#endif
750
751static PyObject *strict_errors(PyObject *self, PyObject *exc)
752{
753    return PyCodec_StrictErrors(exc);
754}
755
756
757#ifdef Py_USING_UNICODE
758static PyObject *ignore_errors(PyObject *self, PyObject *exc)
759{
760    return PyCodec_IgnoreErrors(exc);
761}
762
763
764static PyObject *replace_errors(PyObject *self, PyObject *exc)
765{
766    return PyCodec_ReplaceErrors(exc);
767}
768
769
770static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
771{
772    return PyCodec_XMLCharRefReplaceErrors(exc);
773}
774
775
776static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
777{
778    return PyCodec_BackslashReplaceErrors(exc);
779}
780#endif
781
782static int _PyCodecRegistry_Init(void)
783{
784    static struct {
785        char *name;
786        PyMethodDef def;
787    } methods[] =
788    {
789        {
790            "strict",
791            {
792                "strict_errors",
793                strict_errors,
794                METH_O,
795                PyDoc_STR("Implements the 'strict' error handling, which "
796                          "raises a UnicodeError on coding errors.")
797            }
798        },
799#ifdef Py_USING_UNICODE
800        {
801            "ignore",
802            {
803                "ignore_errors",
804                ignore_errors,
805                METH_O,
806                PyDoc_STR("Implements the 'ignore' error handling, which "
807                          "ignores malformed data and continues.")
808            }
809        },
810        {
811            "replace",
812            {
813                "replace_errors",
814                replace_errors,
815                METH_O,
816                PyDoc_STR("Implements the 'replace' error handling, which "
817                          "replaces malformed data with a replacement marker.")
818            }
819        },
820        {
821            "xmlcharrefreplace",
822            {
823                "xmlcharrefreplace_errors",
824                xmlcharrefreplace_errors,
825                METH_O,
826                PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
827                          "which replaces an unencodable character with the "
828                          "appropriate XML character reference.")
829            }
830        },
831        {
832            "backslashreplace",
833            {
834                "backslashreplace_errors",
835                backslashreplace_errors,
836                METH_O,
837                PyDoc_STR("Implements the 'backslashreplace' error handling, "
838                          "which replaces an unencodable character with a "
839                          "backslashed escape sequence.")
840            }
841        }
842#endif
843    };
844
845    PyInterpreterState *interp = PyThreadState_GET()->interp;
846    PyObject *mod;
847    unsigned i;
848
849    if (interp->codec_search_path != NULL)
850        return 0;
851
852    interp->codec_search_path = PyList_New(0);
853    interp->codec_search_cache = PyDict_New();
854    interp->codec_error_registry = PyDict_New();
855
856    if (interp->codec_error_registry) {
857        for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
858            PyObject *func = PyCFunction_New(&methods[i].def, NULL);
859            int res;
860            if (!func)
861                Py_FatalError("can't initialize codec error registry");
862            res = PyCodec_RegisterError(methods[i].name, func);
863            Py_DECREF(func);
864            if (res)
865                Py_FatalError("can't initialize codec error registry");
866        }
867    }
868
869    if (interp->codec_search_path == NULL ||
870        interp->codec_search_cache == NULL ||
871        interp->codec_error_registry == NULL)
872        Py_FatalError("can't initialize codec registry");
873
874    mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
875    if (mod == NULL) {
876        if (PyErr_ExceptionMatches(PyExc_ImportError)) {
877            /* Ignore ImportErrors... this is done so that
878               distributions can disable the encodings package. Note
879               that other errors are not masked, e.g. SystemErrors
880               raised to inform the user of an error in the Python
881               configuration are still reported back to the user. */
882            PyErr_Clear();
883            return 0;
884        }
885        return -1;
886    }
887    Py_DECREF(mod);
888    return 0;
889}
890