1/* ------------------------------------------------------------------------
2
3   _codecs -- Provides access to the codec registry and the builtin
4              codecs.
5
6   This module should never be imported directly. The standard library
7   module "codecs" wraps this builtin module for use within Python.
8
9   The codec registry is accessible via:
10
11     register(search_function) -> None
12
13     lookup(encoding) -> CodecInfo object
14
15   The builtin Unicode codecs use the following interface:
16
17     <encoding>_encode(Unicode_object[,errors='strict']) ->
18        (string object, bytes consumed)
19
20     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21        (Unicode object, bytes consumed)
22
23   <encoding>_encode() interfaces also accept non-Unicode object as
24   input. The objects are then converted to Unicode using
25   PyUnicode_FromObject() prior to applying the conversion.
26
27   These <encoding>s are available: utf_8, unicode_escape,
28   raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29   mbcs (on win32).
30
31
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
34Copyright (c) Corporation for National Research Initiatives.
35
36   ------------------------------------------------------------------------ */
37
38#define PY_SSIZE_T_CLEAN
39#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
43PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
48a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
50
51static
52PyObject *codec_register(PyObject *self, PyObject *search_function)
53{
54    if (PyCodec_Register(search_function))
55        return NULL;
56
57    Py_RETURN_NONE;
58}
59
60PyDoc_STRVAR(lookup__doc__,
61"lookup(encoding) -> CodecInfo\n\
62\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
64a CodecInfo object.");
65
66static
67PyObject *codec_lookup(PyObject *self, PyObject *args)
68{
69    char *encoding;
70
71    if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72        return NULL;
73
74    return _PyCodec_Lookup(encoding);
75}
76
77PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
90    const char *encoding = NULL;
91    const char *errors = NULL;
92    PyObject *v;
93
94    if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95        return NULL;
96
97#ifdef Py_USING_UNICODE
98    if (encoding == NULL)
99        encoding = PyUnicode_GetDefaultEncoding();
100#else
101    if (encoding == NULL) {
102        PyErr_SetString(PyExc_ValueError, "no encoding specified");
103        return NULL;
104    }
105#endif
106
107    /* Encode via the codec registry */
108    return PyCodec_Encode(v, encoding, errors);
109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
118as well as any other name registered with codecs.register_error that is\n\
119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
124    const char *encoding = NULL;
125    const char *errors = NULL;
126    PyObject *v;
127
128    if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129        return NULL;
130
131#ifdef Py_USING_UNICODE
132    if (encoding == NULL)
133        encoding = PyUnicode_GetDefaultEncoding();
134#else
135    if (encoding == NULL) {
136        PyErr_SetString(PyExc_ValueError, "no encoding specified");
137        return NULL;
138    }
139#endif
140
141    /* Decode via the codec registry */
142    return PyCodec_Decode(v, encoding, errors);
143}
144
145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
149                      Py_ssize_t len)
150{
151    PyObject *v;
152    if (unicode == NULL)
153        return NULL;
154    v = Py_BuildValue("On", unicode, len);
155    Py_DECREF(unicode);
156    return v;
157}
158
159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162              PyObject *args)
163{
164    const char *errors = NULL;
165    const char *data;
166    Py_ssize_t size;
167
168    if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169                          &data, &size, &errors))
170        return NULL;
171    return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172                       size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177              PyObject *args)
178{
179    PyObject *str;
180    const char *errors = NULL;
181    char *buf;
182    Py_ssize_t consumed, len;
183
184    if (!PyArg_ParseTuple(args, "S|z:escape_encode",
185                          &str, &errors))
186        return NULL;
187
188    consumed = PyString_GET_SIZE(str);
189    str = PyString_Repr(str, 0);
190    if (!str)
191        return NULL;
192
193    /* The string will be quoted. Unquote, similar to unicode-escape. */
194    buf = PyString_AS_STRING (str);
195    len = PyString_GET_SIZE (str);
196    memmove(buf, buf+1, len-2);
197    if (_PyString_Resize(&str, len-2) < 0)
198        return NULL;
199
200    return codec_tuple(str, consumed);
201}
202
203#ifdef Py_USING_UNICODE
204/* --- Decoder ------------------------------------------------------------ */
205
206static PyObject *
207unicode_internal_decode(PyObject *self,
208                        PyObject *args)
209{
210    PyObject *obj;
211    const char *errors = NULL;
212    const char *data;
213    Py_ssize_t size;
214
215    if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
216                          &obj, &errors))
217        return NULL;
218
219    if (PyUnicode_Check(obj)) {
220        Py_INCREF(obj);
221        return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
222    }
223    else {
224        if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
225            return NULL;
226
227        return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
228                           size);
229    }
230}
231
232static PyObject *
233utf_7_decode(PyObject *self,
234             PyObject *args)
235{
236    Py_buffer pbuf;
237    const char *errors = NULL;
238    int final = 0;
239    Py_ssize_t consumed;
240    PyObject *decoded = NULL;
241
242    if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
243                          &pbuf, &errors, &final))
244        return NULL;
245    consumed = pbuf.len;
246
247    decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
248                                           final ? NULL : &consumed);
249    PyBuffer_Release(&pbuf);
250    if (decoded == NULL)
251        return NULL;
252    return codec_tuple(decoded, consumed);
253}
254
255static PyObject *
256utf_8_decode(PyObject *self,
257            PyObject *args)
258{
259    Py_buffer pbuf;
260    const char *errors = NULL;
261    int final = 0;
262    Py_ssize_t consumed;
263    PyObject *decoded = NULL;
264
265    if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
266                          &pbuf, &errors, &final))
267        return NULL;
268    consumed = pbuf.len;
269
270    decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
271                                           final ? NULL : &consumed);
272    PyBuffer_Release(&pbuf);
273    if (decoded == NULL)
274        return NULL;
275    return codec_tuple(decoded, consumed);
276}
277
278static PyObject *
279utf_16_decode(PyObject *self,
280            PyObject *args)
281{
282    Py_buffer pbuf;
283    const char *errors = NULL;
284    int byteorder = 0;
285    int final = 0;
286    Py_ssize_t consumed;
287    PyObject *decoded;
288
289    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
290                          &pbuf, &errors, &final))
291        return NULL;
292    consumed = pbuf.len; /* This is overwritten unless final is true. */
293    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
294                                        &byteorder, final ? NULL : &consumed);
295    PyBuffer_Release(&pbuf);
296    if (decoded == NULL)
297        return NULL;
298    return codec_tuple(decoded, consumed);
299}
300
301static PyObject *
302utf_16_le_decode(PyObject *self,
303                 PyObject *args)
304{
305    Py_buffer pbuf;
306    const char *errors = NULL;
307    int byteorder = -1;
308    int final = 0;
309    Py_ssize_t consumed;
310    PyObject *decoded = NULL;
311
312    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
313                          &pbuf, &errors, &final))
314        return NULL;
315
316    consumed = pbuf.len; /* This is overwritten unless final is true. */
317    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
318        &byteorder, final ? NULL : &consumed);
319    PyBuffer_Release(&pbuf);
320    if (decoded == NULL)
321        return NULL;
322    return codec_tuple(decoded, consumed);
323}
324
325static PyObject *
326utf_16_be_decode(PyObject *self,
327                 PyObject *args)
328{
329    Py_buffer pbuf;
330    const char *errors = NULL;
331    int byteorder = 1;
332    int final = 0;
333    Py_ssize_t consumed;
334    PyObject *decoded = NULL;
335
336    if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
337                          &pbuf, &errors, &final))
338        return NULL;
339
340    consumed = pbuf.len; /* This is overwritten unless final is true. */
341    decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
342        &byteorder, final ? NULL : &consumed);
343    PyBuffer_Release(&pbuf);
344    if (decoded == NULL)
345        return NULL;
346    return codec_tuple(decoded, consumed);
347}
348
349/* This non-standard version also provides access to the byteorder
350   parameter of the builtin UTF-16 codec.
351
352   It returns a tuple (unicode, bytesread, byteorder) with byteorder
353   being the value in effect at the end of data.
354
355*/
356
357static PyObject *
358utf_16_ex_decode(PyObject *self,
359                 PyObject *args)
360{
361    Py_buffer pbuf;
362    const char *errors = NULL;
363    int byteorder = 0;
364    PyObject *unicode, *tuple;
365    int final = 0;
366    Py_ssize_t consumed;
367
368    if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
369                          &pbuf, &errors, &byteorder, &final))
370        return NULL;
371    consumed = pbuf.len; /* This is overwritten unless final is true. */
372    unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
373                                        &byteorder, final ? NULL : &consumed);
374    PyBuffer_Release(&pbuf);
375    if (unicode == NULL)
376        return NULL;
377    tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
378    Py_DECREF(unicode);
379    return tuple;
380}
381
382static PyObject *
383utf_32_decode(PyObject *self,
384            PyObject *args)
385{
386    Py_buffer pbuf;
387    const char *errors = NULL;
388    int byteorder = 0;
389    int final = 0;
390    Py_ssize_t consumed;
391    PyObject *decoded;
392
393    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
394                          &pbuf, &errors, &final))
395        return NULL;
396    consumed = pbuf.len; /* This is overwritten unless final is true. */
397    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
398                                        &byteorder, final ? NULL : &consumed);
399    PyBuffer_Release(&pbuf);
400    if (decoded == NULL)
401        return NULL;
402    return codec_tuple(decoded, consumed);
403}
404
405static PyObject *
406utf_32_le_decode(PyObject *self,
407                 PyObject *args)
408{
409    Py_buffer pbuf;
410    const char *errors = NULL;
411    int byteorder = -1;
412    int final = 0;
413    Py_ssize_t consumed;
414    PyObject *decoded;
415
416    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
417                          &pbuf, &errors, &final))
418        return NULL;
419    consumed = pbuf.len; /* This is overwritten unless final is true. */
420    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
421                                        &byteorder, final ? NULL : &consumed);
422    PyBuffer_Release(&pbuf);
423    if (decoded == NULL)
424        return NULL;
425    return codec_tuple(decoded, consumed);
426}
427
428static PyObject *
429utf_32_be_decode(PyObject *self,
430                 PyObject *args)
431{
432    Py_buffer pbuf;
433    const char *errors = NULL;
434    int byteorder = 1;
435    int final = 0;
436    Py_ssize_t consumed;
437    PyObject *decoded;
438
439    if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
440                          &pbuf, &errors, &final))
441        return NULL;
442    consumed = pbuf.len; /* This is overwritten unless final is true. */
443    decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
444                                        &byteorder, final ? NULL : &consumed);
445    PyBuffer_Release(&pbuf);
446    if (decoded == NULL)
447        return NULL;
448    return codec_tuple(decoded, consumed);
449}
450
451/* This non-standard version also provides access to the byteorder
452   parameter of the builtin UTF-32 codec.
453
454   It returns a tuple (unicode, bytesread, byteorder) with byteorder
455   being the value in effect at the end of data.
456
457*/
458
459static PyObject *
460utf_32_ex_decode(PyObject *self,
461                 PyObject *args)
462{
463    Py_buffer pbuf;
464    const char *errors = NULL;
465    int byteorder = 0;
466    PyObject *unicode, *tuple;
467    int final = 0;
468    Py_ssize_t consumed;
469
470    if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
471                          &pbuf, &errors, &byteorder, &final))
472        return NULL;
473    consumed = pbuf.len; /* This is overwritten unless final is true. */
474    unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
475                                        &byteorder, final ? NULL : &consumed);
476    PyBuffer_Release(&pbuf);
477    if (unicode == NULL)
478        return NULL;
479    tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
480    Py_DECREF(unicode);
481    return tuple;
482}
483
484static PyObject *
485unicode_escape_decode(PyObject *self,
486                     PyObject *args)
487{
488    Py_buffer pbuf;
489    const char *errors = NULL;
490        PyObject *unicode;
491
492    if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
493                          &pbuf, &errors))
494        return NULL;
495
496    unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
497    PyBuffer_Release(&pbuf);
498    return codec_tuple(unicode, pbuf.len);
499}
500
501static PyObject *
502raw_unicode_escape_decode(PyObject *self,
503                        PyObject *args)
504{
505    Py_buffer pbuf;
506    const char *errors = NULL;
507    PyObject *unicode;
508
509    if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
510                          &pbuf, &errors))
511        return NULL;
512
513    unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
514    PyBuffer_Release(&pbuf);
515    return codec_tuple(unicode, pbuf.len);
516}
517
518static PyObject *
519latin_1_decode(PyObject *self,
520               PyObject *args)
521{
522    Py_buffer pbuf;
523    PyObject *unicode;
524    const char *errors = NULL;
525
526    if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
527                          &pbuf, &errors))
528        return NULL;
529
530    unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
531    PyBuffer_Release(&pbuf);
532    return codec_tuple(unicode, pbuf.len);
533}
534
535static PyObject *
536ascii_decode(PyObject *self,
537             PyObject *args)
538{
539    Py_buffer pbuf;
540    PyObject *unicode;
541    const char *errors = NULL;
542
543    if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
544                          &pbuf, &errors))
545        return NULL;
546
547    unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
548    PyBuffer_Release(&pbuf);
549    return codec_tuple(unicode, pbuf.len);
550}
551
552static PyObject *
553charmap_decode(PyObject *self,
554               PyObject *args)
555{
556    Py_buffer pbuf;
557    PyObject *unicode;
558    const char *errors = NULL;
559    PyObject *mapping = NULL;
560
561    if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
562                          &pbuf, &errors, &mapping))
563        return NULL;
564    if (mapping == Py_None)
565        mapping = NULL;
566
567    unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
568    PyBuffer_Release(&pbuf);
569    return codec_tuple(unicode, pbuf.len);
570}
571
572#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
573
574static PyObject *
575mbcs_decode(PyObject *self,
576            PyObject *args)
577{
578    Py_buffer pbuf;
579    const char *errors = NULL;
580    int final = 0;
581    Py_ssize_t consumed;
582    PyObject *decoded = NULL;
583
584    if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
585                          &pbuf, &errors, &final))
586        return NULL;
587    consumed = pbuf.len;
588
589    decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
590                                           final ? NULL : &consumed);
591    PyBuffer_Release(&pbuf);
592    if (decoded == NULL)
593        return NULL;
594    return codec_tuple(decoded, consumed);
595}
596
597#endif /* MS_WINDOWS */
598
599/* --- Encoder ------------------------------------------------------------ */
600
601static PyObject *
602readbuffer_encode(PyObject *self,
603                  PyObject *args)
604{
605    const char *data;
606    Py_ssize_t size;
607    const char *errors = NULL;
608
609    if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
610                          &data, &size, &errors))
611        return NULL;
612
613    return codec_tuple(PyString_FromStringAndSize(data, size),
614                       size);
615}
616
617static PyObject *
618charbuffer_encode(PyObject *self,
619                  PyObject *args)
620{
621    const char *data;
622    Py_ssize_t size;
623    const char *errors = NULL;
624
625    if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
626                          &data, &size, &errors))
627        return NULL;
628
629    return codec_tuple(PyString_FromStringAndSize(data, size),
630                       size);
631}
632
633static PyObject *
634unicode_internal_encode(PyObject *self,
635                        PyObject *args)
636{
637    PyObject *obj;
638    const char *errors = NULL;
639    const char *data;
640    Py_ssize_t size;
641
642    if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
643                          &obj, &errors))
644        return NULL;
645
646    if (PyUnicode_Check(obj)) {
647        data = PyUnicode_AS_DATA(obj);
648        size = PyUnicode_GET_DATA_SIZE(obj);
649        return codec_tuple(PyString_FromStringAndSize(data, size),
650                           PyUnicode_GET_SIZE(obj));
651    }
652    else {
653        if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
654            return NULL;
655        return codec_tuple(PyString_FromStringAndSize(data, size),
656                           size);
657    }
658}
659
660static PyObject *
661utf_7_encode(PyObject *self,
662            PyObject *args)
663{
664    PyObject *str, *v;
665    const char *errors = NULL;
666
667    if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
668                          &str, &errors))
669        return NULL;
670
671    str = PyUnicode_FromObject(str);
672    if (str == NULL)
673        return NULL;
674    v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
675                                         PyUnicode_GET_SIZE(str),
676                                         0,
677                                         0,
678                                         errors),
679                    PyUnicode_GET_SIZE(str));
680    Py_DECREF(str);
681    return v;
682}
683
684static PyObject *
685utf_8_encode(PyObject *self,
686            PyObject *args)
687{
688    PyObject *str, *v;
689    const char *errors = NULL;
690
691    if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
692                          &str, &errors))
693        return NULL;
694
695    str = PyUnicode_FromObject(str);
696    if (str == NULL)
697        return NULL;
698    v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
699                                         PyUnicode_GET_SIZE(str),
700                                         errors),
701                    PyUnicode_GET_SIZE(str));
702    Py_DECREF(str);
703    return v;
704}
705
706/* This version provides access to the byteorder parameter of the
707   builtin UTF-16 codecs as optional third argument. It defaults to 0
708   which means: use the native byte order and prepend the data with a
709   BOM mark.
710
711*/
712
713static PyObject *
714utf_16_encode(PyObject *self,
715            PyObject *args)
716{
717    PyObject *str, *v;
718    const char *errors = NULL;
719    int byteorder = 0;
720
721    if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
722                          &str, &errors, &byteorder))
723        return NULL;
724
725    str = PyUnicode_FromObject(str);
726    if (str == NULL)
727        return NULL;
728    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
729                                          PyUnicode_GET_SIZE(str),
730                                          errors,
731                                          byteorder),
732                    PyUnicode_GET_SIZE(str));
733    Py_DECREF(str);
734    return v;
735}
736
737static PyObject *
738utf_16_le_encode(PyObject *self,
739                 PyObject *args)
740{
741    PyObject *str, *v;
742    const char *errors = NULL;
743
744    if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
745                          &str, &errors))
746        return NULL;
747
748    str = PyUnicode_FromObject(str);
749    if (str == NULL)
750        return NULL;
751    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
752                                             PyUnicode_GET_SIZE(str),
753                                             errors,
754                                             -1),
755                       PyUnicode_GET_SIZE(str));
756    Py_DECREF(str);
757    return v;
758}
759
760static PyObject *
761utf_16_be_encode(PyObject *self,
762                 PyObject *args)
763{
764    PyObject *str, *v;
765    const char *errors = NULL;
766
767    if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
768                          &str, &errors))
769        return NULL;
770
771    str = PyUnicode_FromObject(str);
772    if (str == NULL)
773        return NULL;
774    v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
775                                          PyUnicode_GET_SIZE(str),
776                                          errors,
777                                          +1),
778                    PyUnicode_GET_SIZE(str));
779    Py_DECREF(str);
780    return v;
781}
782
783/* This version provides access to the byteorder parameter of the
784   builtin UTF-32 codecs as optional third argument. It defaults to 0
785   which means: use the native byte order and prepend the data with a
786   BOM mark.
787
788*/
789
790static PyObject *
791utf_32_encode(PyObject *self,
792            PyObject *args)
793{
794    PyObject *str, *v;
795    const char *errors = NULL;
796    int byteorder = 0;
797
798    if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
799                          &str, &errors, &byteorder))
800        return NULL;
801
802    str = PyUnicode_FromObject(str);
803    if (str == NULL)
804        return NULL;
805    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
806                                          PyUnicode_GET_SIZE(str),
807                                          errors,
808                                          byteorder),
809                    PyUnicode_GET_SIZE(str));
810    Py_DECREF(str);
811    return v;
812}
813
814static PyObject *
815utf_32_le_encode(PyObject *self,
816                 PyObject *args)
817{
818    PyObject *str, *v;
819    const char *errors = NULL;
820
821    if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
822                          &str, &errors))
823        return NULL;
824
825    str = PyUnicode_FromObject(str);
826    if (str == NULL)
827        return NULL;
828    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
829                                             PyUnicode_GET_SIZE(str),
830                                             errors,
831                                             -1),
832                       PyUnicode_GET_SIZE(str));
833    Py_DECREF(str);
834    return v;
835}
836
837static PyObject *
838utf_32_be_encode(PyObject *self,
839                 PyObject *args)
840{
841    PyObject *str, *v;
842    const char *errors = NULL;
843
844    if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
845                          &str, &errors))
846        return NULL;
847
848    str = PyUnicode_FromObject(str);
849    if (str == NULL)
850        return NULL;
851    v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852                                          PyUnicode_GET_SIZE(str),
853                                          errors,
854                                          +1),
855                    PyUnicode_GET_SIZE(str));
856    Py_DECREF(str);
857    return v;
858}
859
860static PyObject *
861unicode_escape_encode(PyObject *self,
862                     PyObject *args)
863{
864    PyObject *str, *v;
865    const char *errors = NULL;
866
867    if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
868                          &str, &errors))
869        return NULL;
870
871    str = PyUnicode_FromObject(str);
872    if (str == NULL)
873        return NULL;
874    v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
875                                                  PyUnicode_GET_SIZE(str)),
876                    PyUnicode_GET_SIZE(str));
877    Py_DECREF(str);
878    return v;
879}
880
881static PyObject *
882raw_unicode_escape_encode(PyObject *self,
883                        PyObject *args)
884{
885    PyObject *str, *v;
886    const char *errors = NULL;
887
888    if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
889                          &str, &errors))
890        return NULL;
891
892    str = PyUnicode_FromObject(str);
893    if (str == NULL)
894        return NULL;
895    v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
896                               PyUnicode_AS_UNICODE(str),
897                               PyUnicode_GET_SIZE(str)),
898                    PyUnicode_GET_SIZE(str));
899    Py_DECREF(str);
900    return v;
901}
902
903static PyObject *
904latin_1_encode(PyObject *self,
905               PyObject *args)
906{
907    PyObject *str, *v;
908    const char *errors = NULL;
909
910    if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
911                          &str, &errors))
912        return NULL;
913
914    str = PyUnicode_FromObject(str);
915    if (str == NULL)
916        return NULL;
917    v = codec_tuple(PyUnicode_EncodeLatin1(
918                               PyUnicode_AS_UNICODE(str),
919                               PyUnicode_GET_SIZE(str),
920                               errors),
921                    PyUnicode_GET_SIZE(str));
922    Py_DECREF(str);
923    return v;
924}
925
926static PyObject *
927ascii_encode(PyObject *self,
928             PyObject *args)
929{
930    PyObject *str, *v;
931    const char *errors = NULL;
932
933    if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
934                          &str, &errors))
935        return NULL;
936
937    str = PyUnicode_FromObject(str);
938    if (str == NULL)
939        return NULL;
940    v = codec_tuple(PyUnicode_EncodeASCII(
941                               PyUnicode_AS_UNICODE(str),
942                               PyUnicode_GET_SIZE(str),
943                               errors),
944                    PyUnicode_GET_SIZE(str));
945    Py_DECREF(str);
946    return v;
947}
948
949static PyObject *
950charmap_encode(PyObject *self,
951             PyObject *args)
952{
953    PyObject *str, *v;
954    const char *errors = NULL;
955    PyObject *mapping = NULL;
956
957    if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
958                          &str, &errors, &mapping))
959        return NULL;
960    if (mapping == Py_None)
961        mapping = NULL;
962
963    str = PyUnicode_FromObject(str);
964    if (str == NULL)
965        return NULL;
966    v = codec_tuple(PyUnicode_EncodeCharmap(
967                               PyUnicode_AS_UNICODE(str),
968                               PyUnicode_GET_SIZE(str),
969                               mapping,
970                               errors),
971                    PyUnicode_GET_SIZE(str));
972    Py_DECREF(str);
973    return v;
974}
975
976static PyObject*
977charmap_build(PyObject *self, PyObject *args)
978{
979    PyObject *map;
980    if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
981        return NULL;
982    return PyUnicode_BuildEncodingMap(map);
983}
984
985#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
986
987static PyObject *
988mbcs_encode(PyObject *self,
989            PyObject *args)
990{
991    PyObject *str, *v;
992    const char *errors = NULL;
993
994    if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
995                          &str, &errors))
996        return NULL;
997
998    str = PyUnicode_FromObject(str);
999    if (str == NULL)
1000        return NULL;
1001    v = codec_tuple(PyUnicode_EncodeMBCS(
1002                               PyUnicode_AS_UNICODE(str),
1003                               PyUnicode_GET_SIZE(str),
1004                               errors),
1005                    PyUnicode_GET_SIZE(str));
1006    Py_DECREF(str);
1007    return v;
1008}
1009
1010#endif /* MS_WINDOWS */
1011#endif /* Py_USING_UNICODE */
1012
1013/* --- Error handler registry --------------------------------------------- */
1014
1015PyDoc_STRVAR(register_error__doc__,
1016"register_error(errors, handler)\n\
1017\n\
1018Register the specified error handler under the name\n\
1019errors. handler must be a callable object, that\n\
1020will be called with an exception instance containing\n\
1021information about the location of the encoding/decoding\n\
1022error and must return a (replacement, new position) tuple.");
1023
1024static PyObject *register_error(PyObject *self, PyObject *args)
1025{
1026    const char *name;
1027    PyObject *handler;
1028
1029    if (!PyArg_ParseTuple(args, "sO:register_error",
1030                          &name, &handler))
1031        return NULL;
1032    if (PyCodec_RegisterError(name, handler))
1033        return NULL;
1034    Py_RETURN_NONE;
1035}
1036
1037PyDoc_STRVAR(lookup_error__doc__,
1038"lookup_error(errors) -> handler\n\
1039\n\
1040Return the error handler for the specified error handling name\n\
1041or raise a LookupError, if no handler exists under this name.");
1042
1043static PyObject *lookup_error(PyObject *self, PyObject *args)
1044{
1045    const char *name;
1046
1047    if (!PyArg_ParseTuple(args, "s:lookup_error",
1048                          &name))
1049        return NULL;
1050    return PyCodec_LookupError(name);
1051}
1052
1053/* --- Module API --------------------------------------------------------- */
1054
1055static PyMethodDef _codecs_functions[] = {
1056    {"register",                codec_register,                 METH_O,
1057        register__doc__},
1058    {"lookup",                  codec_lookup,                   METH_VARARGS,
1059        lookup__doc__},
1060    {"encode",                  codec_encode,                   METH_VARARGS,
1061        encode__doc__},
1062    {"decode",                  codec_decode,                   METH_VARARGS,
1063        decode__doc__},
1064    {"escape_encode",           escape_encode,                  METH_VARARGS},
1065    {"escape_decode",           escape_decode,                  METH_VARARGS},
1066#ifdef Py_USING_UNICODE
1067    {"utf_8_encode",            utf_8_encode,                   METH_VARARGS},
1068    {"utf_8_decode",            utf_8_decode,                   METH_VARARGS},
1069    {"utf_7_encode",            utf_7_encode,                   METH_VARARGS},
1070    {"utf_7_decode",            utf_7_decode,                   METH_VARARGS},
1071    {"utf_16_encode",           utf_16_encode,                  METH_VARARGS},
1072    {"utf_16_le_encode",        utf_16_le_encode,               METH_VARARGS},
1073    {"utf_16_be_encode",        utf_16_be_encode,               METH_VARARGS},
1074    {"utf_16_decode",           utf_16_decode,                  METH_VARARGS},
1075    {"utf_16_le_decode",        utf_16_le_decode,               METH_VARARGS},
1076    {"utf_16_be_decode",        utf_16_be_decode,               METH_VARARGS},
1077    {"utf_16_ex_decode",        utf_16_ex_decode,               METH_VARARGS},
1078    {"utf_32_encode",           utf_32_encode,                  METH_VARARGS},
1079    {"utf_32_le_encode",        utf_32_le_encode,               METH_VARARGS},
1080    {"utf_32_be_encode",        utf_32_be_encode,               METH_VARARGS},
1081    {"utf_32_decode",           utf_32_decode,                  METH_VARARGS},
1082    {"utf_32_le_decode",        utf_32_le_decode,               METH_VARARGS},
1083    {"utf_32_be_decode",        utf_32_be_decode,               METH_VARARGS},
1084    {"utf_32_ex_decode",        utf_32_ex_decode,               METH_VARARGS},
1085    {"unicode_escape_encode",   unicode_escape_encode,          METH_VARARGS},
1086    {"unicode_escape_decode",   unicode_escape_decode,          METH_VARARGS},
1087    {"unicode_internal_encode", unicode_internal_encode,        METH_VARARGS},
1088    {"unicode_internal_decode", unicode_internal_decode,        METH_VARARGS},
1089    {"raw_unicode_escape_encode", raw_unicode_escape_encode,    METH_VARARGS},
1090    {"raw_unicode_escape_decode", raw_unicode_escape_decode,    METH_VARARGS},
1091    {"latin_1_encode",          latin_1_encode,                 METH_VARARGS},
1092    {"latin_1_decode",          latin_1_decode,                 METH_VARARGS},
1093    {"ascii_encode",            ascii_encode,                   METH_VARARGS},
1094    {"ascii_decode",            ascii_decode,                   METH_VARARGS},
1095    {"charmap_encode",          charmap_encode,                 METH_VARARGS},
1096    {"charmap_decode",          charmap_decode,                 METH_VARARGS},
1097    {"charmap_build",           charmap_build,                  METH_VARARGS},
1098    {"readbuffer_encode",       readbuffer_encode,              METH_VARARGS},
1099    {"charbuffer_encode",       charbuffer_encode,              METH_VARARGS},
1100#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1101    {"mbcs_encode",             mbcs_encode,                    METH_VARARGS},
1102    {"mbcs_decode",             mbcs_decode,                    METH_VARARGS},
1103#endif
1104#endif /* Py_USING_UNICODE */
1105    {"register_error",          register_error,                 METH_VARARGS,
1106        register_error__doc__},
1107    {"lookup_error",            lookup_error,                   METH_VARARGS,
1108        lookup_error__doc__},
1109    {NULL, NULL}                /* sentinel */
1110};
1111
1112PyMODINIT_FUNC
1113init_codecs(void)
1114{
1115    Py_InitModule("_codecs", _codecs_functions);
1116}
1117