1/* ------------------------------------------------------------------------ 2 3 _codecs -- Provides access to the codec registry and the builtin 4 codecs. 5 6 This module should never be imported directly. The standard library 7 module "codecs" wraps this builtin module for use within Python. 8 9 The codec registry is accessible via: 10 11 register(search_function) -> None 12 13 lookup(encoding) -> CodecInfo object 14 15 The builtin Unicode codecs use the following interface: 16 17 <encoding>_encode(Unicode_object[,errors='strict']) -> 18 (string object, bytes consumed) 19 20 <encoding>_decode(char_buffer_obj[,errors='strict']) -> 21 (Unicode object, bytes consumed) 22 23 <encoding>_encode() interfaces also accept non-Unicode object as 24 input. The objects are then converted to Unicode using 25 PyUnicode_FromObject() prior to applying the conversion. 26 27 These <encoding>s are available: utf_8, unicode_escape, 28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit), 29 mbcs (on win32). 30 31 32Written by Marc-Andre Lemburg (mal@lemburg.com). 33 34Copyright (c) Corporation for National Research Initiatives. 35 36 ------------------------------------------------------------------------ */ 37 38#define PY_SSIZE_T_CLEAN 39#include "Python.h" 40 41/* --- Registry ----------------------------------------------------------- */ 42 43PyDoc_STRVAR(register__doc__, 44"register(search_function)\n\ 45\n\ 46Register a codec search function. Search functions are expected to take\n\ 47one argument, the encoding name in all lower case letters, and return\n\ 48a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\ 49(or a CodecInfo object)."); 50 51static 52PyObject *codec_register(PyObject *self, PyObject *search_function) 53{ 54 if (PyCodec_Register(search_function)) 55 return NULL; 56 57 Py_RETURN_NONE; 58} 59 60PyDoc_STRVAR(lookup__doc__, 61"lookup(encoding) -> CodecInfo\n\ 62\n\ 63Looks up a codec tuple in the Python codec registry and returns\n\ 64a CodecInfo object."); 65 66static 67PyObject *codec_lookup(PyObject *self, PyObject *args) 68{ 69 char *encoding; 70 71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding)) 72 return NULL; 73 74 return _PyCodec_Lookup(encoding); 75} 76 77PyDoc_STRVAR(encode__doc__, 78"encode(obj, [encoding[,errors]]) -> object\n\ 79\n\ 80Encodes obj using the codec registered for encoding. encoding defaults\n\ 81to the default encoding. errors may be given to set a different error\n\ 82handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 83a ValueError. Other possible values are 'ignore', 'replace' and\n\ 84'xmlcharrefreplace' as well as any other name registered with\n\ 85codecs.register_error that can handle ValueErrors."); 86 87static PyObject * 88codec_encode(PyObject *self, PyObject *args) 89{ 90 const char *encoding = NULL; 91 const char *errors = NULL; 92 PyObject *v; 93 94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors)) 95 return NULL; 96 97#ifdef Py_USING_UNICODE 98 if (encoding == NULL) 99 encoding = PyUnicode_GetDefaultEncoding(); 100#else 101 if (encoding == NULL) { 102 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 103 return NULL; 104 } 105#endif 106 107 /* Encode via the codec registry */ 108 return PyCodec_Encode(v, encoding, errors); 109} 110 111PyDoc_STRVAR(decode__doc__, 112"decode(obj, [encoding[,errors]]) -> object\n\ 113\n\ 114Decodes obj using the codec registered for encoding. encoding defaults\n\ 115to the default encoding. errors may be given to set a different error\n\ 116handling scheme. Default is 'strict' meaning that encoding errors raise\n\ 117a ValueError. Other possible values are 'ignore' and 'replace'\n\ 118as well as any other name registered with codecs.register_error that is\n\ 119able to handle ValueErrors."); 120 121static PyObject * 122codec_decode(PyObject *self, PyObject *args) 123{ 124 const char *encoding = NULL; 125 const char *errors = NULL; 126 PyObject *v; 127 128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors)) 129 return NULL; 130 131#ifdef Py_USING_UNICODE 132 if (encoding == NULL) 133 encoding = PyUnicode_GetDefaultEncoding(); 134#else 135 if (encoding == NULL) { 136 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 137 return NULL; 138 } 139#endif 140 141 /* Decode via the codec registry */ 142 return PyCodec_Decode(v, encoding, errors); 143} 144 145/* --- Helpers ------------------------------------------------------------ */ 146 147static 148PyObject *codec_tuple(PyObject *unicode, 149 Py_ssize_t len) 150{ 151 PyObject *v; 152 if (unicode == NULL) 153 return NULL; 154 v = Py_BuildValue("On", unicode, len); 155 Py_DECREF(unicode); 156 return v; 157} 158 159/* --- String codecs ------------------------------------------------------ */ 160static PyObject * 161escape_decode(PyObject *self, 162 PyObject *args) 163{ 164 const char *errors = NULL; 165 const char *data; 166 Py_ssize_t size; 167 168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode", 169 &data, &size, &errors)) 170 return NULL; 171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), 172 size); 173} 174 175static PyObject * 176escape_encode(PyObject *self, 177 PyObject *args) 178{ 179 PyObject *str; 180 const char *errors = NULL; 181 char *buf; 182 Py_ssize_t consumed, len; 183 184 if (!PyArg_ParseTuple(args, "S|z:escape_encode", 185 &str, &errors)) 186 return NULL; 187 188 consumed = PyString_GET_SIZE(str); 189 str = PyString_Repr(str, 0); 190 if (!str) 191 return NULL; 192 193 /* The string will be quoted. Unquote, similar to unicode-escape. */ 194 buf = PyString_AS_STRING (str); 195 len = PyString_GET_SIZE (str); 196 memmove(buf, buf+1, len-2); 197 if (_PyString_Resize(&str, len-2) < 0) 198 return NULL; 199 200 return codec_tuple(str, consumed); 201} 202 203#ifdef Py_USING_UNICODE 204/* --- Decoder ------------------------------------------------------------ */ 205 206static PyObject * 207unicode_internal_decode(PyObject *self, 208 PyObject *args) 209{ 210 PyObject *obj; 211 const char *errors = NULL; 212 const char *data; 213 Py_ssize_t size; 214 215 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode", 216 &obj, &errors)) 217 return NULL; 218 219 if (PyUnicode_Check(obj)) { 220 Py_INCREF(obj); 221 return codec_tuple(obj, PyUnicode_GET_SIZE(obj)); 222 } 223 else { 224 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) 225 return NULL; 226 227 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors), 228 size); 229 } 230} 231 232static PyObject * 233utf_7_decode(PyObject *self, 234 PyObject *args) 235{ 236 Py_buffer pbuf; 237 const char *errors = NULL; 238 int final = 0; 239 Py_ssize_t consumed; 240 PyObject *decoded = NULL; 241 242 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode", 243 &pbuf, &errors, &final)) 244 return NULL; 245 consumed = pbuf.len; 246 247 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors, 248 final ? NULL : &consumed); 249 PyBuffer_Release(&pbuf); 250 if (decoded == NULL) 251 return NULL; 252 return codec_tuple(decoded, consumed); 253} 254 255static PyObject * 256utf_8_decode(PyObject *self, 257 PyObject *args) 258{ 259 Py_buffer pbuf; 260 const char *errors = NULL; 261 int final = 0; 262 Py_ssize_t consumed; 263 PyObject *decoded = NULL; 264 265 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode", 266 &pbuf, &errors, &final)) 267 return NULL; 268 consumed = pbuf.len; 269 270 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors, 271 final ? NULL : &consumed); 272 PyBuffer_Release(&pbuf); 273 if (decoded == NULL) 274 return NULL; 275 return codec_tuple(decoded, consumed); 276} 277 278static PyObject * 279utf_16_decode(PyObject *self, 280 PyObject *args) 281{ 282 Py_buffer pbuf; 283 const char *errors = NULL; 284 int byteorder = 0; 285 int final = 0; 286 Py_ssize_t consumed; 287 PyObject *decoded; 288 289 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode", 290 &pbuf, &errors, &final)) 291 return NULL; 292 consumed = pbuf.len; /* This is overwritten unless final is true. */ 293 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 294 &byteorder, final ? NULL : &consumed); 295 PyBuffer_Release(&pbuf); 296 if (decoded == NULL) 297 return NULL; 298 return codec_tuple(decoded, consumed); 299} 300 301static PyObject * 302utf_16_le_decode(PyObject *self, 303 PyObject *args) 304{ 305 Py_buffer pbuf; 306 const char *errors = NULL; 307 int byteorder = -1; 308 int final = 0; 309 Py_ssize_t consumed; 310 PyObject *decoded = NULL; 311 312 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode", 313 &pbuf, &errors, &final)) 314 return NULL; 315 316 consumed = pbuf.len; /* This is overwritten unless final is true. */ 317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 318 &byteorder, final ? NULL : &consumed); 319 PyBuffer_Release(&pbuf); 320 if (decoded == NULL) 321 return NULL; 322 return codec_tuple(decoded, consumed); 323} 324 325static PyObject * 326utf_16_be_decode(PyObject *self, 327 PyObject *args) 328{ 329 Py_buffer pbuf; 330 const char *errors = NULL; 331 int byteorder = 1; 332 int final = 0; 333 Py_ssize_t consumed; 334 PyObject *decoded = NULL; 335 336 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode", 337 &pbuf, &errors, &final)) 338 return NULL; 339 340 consumed = pbuf.len; /* This is overwritten unless final is true. */ 341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 342 &byteorder, final ? NULL : &consumed); 343 PyBuffer_Release(&pbuf); 344 if (decoded == NULL) 345 return NULL; 346 return codec_tuple(decoded, consumed); 347} 348 349/* This non-standard version also provides access to the byteorder 350 parameter of the builtin UTF-16 codec. 351 352 It returns a tuple (unicode, bytesread, byteorder) with byteorder 353 being the value in effect at the end of data. 354 355*/ 356 357static PyObject * 358utf_16_ex_decode(PyObject *self, 359 PyObject *args) 360{ 361 Py_buffer pbuf; 362 const char *errors = NULL; 363 int byteorder = 0; 364 PyObject *unicode, *tuple; 365 int final = 0; 366 Py_ssize_t consumed; 367 368 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode", 369 &pbuf, &errors, &byteorder, &final)) 370 return NULL; 371 consumed = pbuf.len; /* This is overwritten unless final is true. */ 372 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors, 373 &byteorder, final ? NULL : &consumed); 374 PyBuffer_Release(&pbuf); 375 if (unicode == NULL) 376 return NULL; 377 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); 378 Py_DECREF(unicode); 379 return tuple; 380} 381 382static PyObject * 383utf_32_decode(PyObject *self, 384 PyObject *args) 385{ 386 Py_buffer pbuf; 387 const char *errors = NULL; 388 int byteorder = 0; 389 int final = 0; 390 Py_ssize_t consumed; 391 PyObject *decoded; 392 393 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode", 394 &pbuf, &errors, &final)) 395 return NULL; 396 consumed = pbuf.len; /* This is overwritten unless final is true. */ 397 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 398 &byteorder, final ? NULL : &consumed); 399 PyBuffer_Release(&pbuf); 400 if (decoded == NULL) 401 return NULL; 402 return codec_tuple(decoded, consumed); 403} 404 405static PyObject * 406utf_32_le_decode(PyObject *self, 407 PyObject *args) 408{ 409 Py_buffer pbuf; 410 const char *errors = NULL; 411 int byteorder = -1; 412 int final = 0; 413 Py_ssize_t consumed; 414 PyObject *decoded; 415 416 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode", 417 &pbuf, &errors, &final)) 418 return NULL; 419 consumed = pbuf.len; /* This is overwritten unless final is true. */ 420 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 421 &byteorder, final ? NULL : &consumed); 422 PyBuffer_Release(&pbuf); 423 if (decoded == NULL) 424 return NULL; 425 return codec_tuple(decoded, consumed); 426} 427 428static PyObject * 429utf_32_be_decode(PyObject *self, 430 PyObject *args) 431{ 432 Py_buffer pbuf; 433 const char *errors = NULL; 434 int byteorder = 1; 435 int final = 0; 436 Py_ssize_t consumed; 437 PyObject *decoded; 438 439 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode", 440 &pbuf, &errors, &final)) 441 return NULL; 442 consumed = pbuf.len; /* This is overwritten unless final is true. */ 443 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 444 &byteorder, final ? NULL : &consumed); 445 PyBuffer_Release(&pbuf); 446 if (decoded == NULL) 447 return NULL; 448 return codec_tuple(decoded, consumed); 449} 450 451/* This non-standard version also provides access to the byteorder 452 parameter of the builtin UTF-32 codec. 453 454 It returns a tuple (unicode, bytesread, byteorder) with byteorder 455 being the value in effect at the end of data. 456 457*/ 458 459static PyObject * 460utf_32_ex_decode(PyObject *self, 461 PyObject *args) 462{ 463 Py_buffer pbuf; 464 const char *errors = NULL; 465 int byteorder = 0; 466 PyObject *unicode, *tuple; 467 int final = 0; 468 Py_ssize_t consumed; 469 470 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode", 471 &pbuf, &errors, &byteorder, &final)) 472 return NULL; 473 consumed = pbuf.len; /* This is overwritten unless final is true. */ 474 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors, 475 &byteorder, final ? NULL : &consumed); 476 PyBuffer_Release(&pbuf); 477 if (unicode == NULL) 478 return NULL; 479 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder); 480 Py_DECREF(unicode); 481 return tuple; 482} 483 484static PyObject * 485unicode_escape_decode(PyObject *self, 486 PyObject *args) 487{ 488 Py_buffer pbuf; 489 const char *errors = NULL; 490 PyObject *unicode; 491 492 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode", 493 &pbuf, &errors)) 494 return NULL; 495 496 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors); 497 PyBuffer_Release(&pbuf); 498 return codec_tuple(unicode, pbuf.len); 499} 500 501static PyObject * 502raw_unicode_escape_decode(PyObject *self, 503 PyObject *args) 504{ 505 Py_buffer pbuf; 506 const char *errors = NULL; 507 PyObject *unicode; 508 509 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode", 510 &pbuf, &errors)) 511 return NULL; 512 513 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors); 514 PyBuffer_Release(&pbuf); 515 return codec_tuple(unicode, pbuf.len); 516} 517 518static PyObject * 519latin_1_decode(PyObject *self, 520 PyObject *args) 521{ 522 Py_buffer pbuf; 523 PyObject *unicode; 524 const char *errors = NULL; 525 526 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode", 527 &pbuf, &errors)) 528 return NULL; 529 530 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors); 531 PyBuffer_Release(&pbuf); 532 return codec_tuple(unicode, pbuf.len); 533} 534 535static PyObject * 536ascii_decode(PyObject *self, 537 PyObject *args) 538{ 539 Py_buffer pbuf; 540 PyObject *unicode; 541 const char *errors = NULL; 542 543 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode", 544 &pbuf, &errors)) 545 return NULL; 546 547 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors); 548 PyBuffer_Release(&pbuf); 549 return codec_tuple(unicode, pbuf.len); 550} 551 552static PyObject * 553charmap_decode(PyObject *self, 554 PyObject *args) 555{ 556 Py_buffer pbuf; 557 PyObject *unicode; 558 const char *errors = NULL; 559 PyObject *mapping = NULL; 560 561 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode", 562 &pbuf, &errors, &mapping)) 563 return NULL; 564 if (mapping == Py_None) 565 mapping = NULL; 566 567 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors); 568 PyBuffer_Release(&pbuf); 569 return codec_tuple(unicode, pbuf.len); 570} 571 572#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 573 574static PyObject * 575mbcs_decode(PyObject *self, 576 PyObject *args) 577{ 578 Py_buffer pbuf; 579 const char *errors = NULL; 580 int final = 0; 581 Py_ssize_t consumed; 582 PyObject *decoded = NULL; 583 584 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode", 585 &pbuf, &errors, &final)) 586 return NULL; 587 consumed = pbuf.len; 588 589 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors, 590 final ? NULL : &consumed); 591 PyBuffer_Release(&pbuf); 592 if (decoded == NULL) 593 return NULL; 594 return codec_tuple(decoded, consumed); 595} 596 597#endif /* MS_WINDOWS */ 598 599/* --- Encoder ------------------------------------------------------------ */ 600 601static PyObject * 602readbuffer_encode(PyObject *self, 603 PyObject *args) 604{ 605 const char *data; 606 Py_ssize_t size; 607 const char *errors = NULL; 608 609 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode", 610 &data, &size, &errors)) 611 return NULL; 612 613 return codec_tuple(PyString_FromStringAndSize(data, size), 614 size); 615} 616 617static PyObject * 618charbuffer_encode(PyObject *self, 619 PyObject *args) 620{ 621 const char *data; 622 Py_ssize_t size; 623 const char *errors = NULL; 624 625 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode", 626 &data, &size, &errors)) 627 return NULL; 628 629 return codec_tuple(PyString_FromStringAndSize(data, size), 630 size); 631} 632 633static PyObject * 634unicode_internal_encode(PyObject *self, 635 PyObject *args) 636{ 637 PyObject *obj; 638 const char *errors = NULL; 639 const char *data; 640 Py_ssize_t size; 641 642 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode", 643 &obj, &errors)) 644 return NULL; 645 646 if (PyUnicode_Check(obj)) { 647 data = PyUnicode_AS_DATA(obj); 648 size = PyUnicode_GET_DATA_SIZE(obj); 649 return codec_tuple(PyString_FromStringAndSize(data, size), 650 PyUnicode_GET_SIZE(obj)); 651 } 652 else { 653 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size)) 654 return NULL; 655 return codec_tuple(PyString_FromStringAndSize(data, size), 656 size); 657 } 658} 659 660static PyObject * 661utf_7_encode(PyObject *self, 662 PyObject *args) 663{ 664 PyObject *str, *v; 665 const char *errors = NULL; 666 667 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode", 668 &str, &errors)) 669 return NULL; 670 671 str = PyUnicode_FromObject(str); 672 if (str == NULL) 673 return NULL; 674 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str), 675 PyUnicode_GET_SIZE(str), 676 0, 677 0, 678 errors), 679 PyUnicode_GET_SIZE(str)); 680 Py_DECREF(str); 681 return v; 682} 683 684static PyObject * 685utf_8_encode(PyObject *self, 686 PyObject *args) 687{ 688 PyObject *str, *v; 689 const char *errors = NULL; 690 691 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode", 692 &str, &errors)) 693 return NULL; 694 695 str = PyUnicode_FromObject(str); 696 if (str == NULL) 697 return NULL; 698 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str), 699 PyUnicode_GET_SIZE(str), 700 errors), 701 PyUnicode_GET_SIZE(str)); 702 Py_DECREF(str); 703 return v; 704} 705 706/* This version provides access to the byteorder parameter of the 707 builtin UTF-16 codecs as optional third argument. It defaults to 0 708 which means: use the native byte order and prepend the data with a 709 BOM mark. 710 711*/ 712 713static PyObject * 714utf_16_encode(PyObject *self, 715 PyObject *args) 716{ 717 PyObject *str, *v; 718 const char *errors = NULL; 719 int byteorder = 0; 720 721 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode", 722 &str, &errors, &byteorder)) 723 return NULL; 724 725 str = PyUnicode_FromObject(str); 726 if (str == NULL) 727 return NULL; 728 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 729 PyUnicode_GET_SIZE(str), 730 errors, 731 byteorder), 732 PyUnicode_GET_SIZE(str)); 733 Py_DECREF(str); 734 return v; 735} 736 737static PyObject * 738utf_16_le_encode(PyObject *self, 739 PyObject *args) 740{ 741 PyObject *str, *v; 742 const char *errors = NULL; 743 744 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode", 745 &str, &errors)) 746 return NULL; 747 748 str = PyUnicode_FromObject(str); 749 if (str == NULL) 750 return NULL; 751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 752 PyUnicode_GET_SIZE(str), 753 errors, 754 -1), 755 PyUnicode_GET_SIZE(str)); 756 Py_DECREF(str); 757 return v; 758} 759 760static PyObject * 761utf_16_be_encode(PyObject *self, 762 PyObject *args) 763{ 764 PyObject *str, *v; 765 const char *errors = NULL; 766 767 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode", 768 &str, &errors)) 769 return NULL; 770 771 str = PyUnicode_FromObject(str); 772 if (str == NULL) 773 return NULL; 774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str), 775 PyUnicode_GET_SIZE(str), 776 errors, 777 +1), 778 PyUnicode_GET_SIZE(str)); 779 Py_DECREF(str); 780 return v; 781} 782 783/* This version provides access to the byteorder parameter of the 784 builtin UTF-32 codecs as optional third argument. It defaults to 0 785 which means: use the native byte order and prepend the data with a 786 BOM mark. 787 788*/ 789 790static PyObject * 791utf_32_encode(PyObject *self, 792 PyObject *args) 793{ 794 PyObject *str, *v; 795 const char *errors = NULL; 796 int byteorder = 0; 797 798 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode", 799 &str, &errors, &byteorder)) 800 return NULL; 801 802 str = PyUnicode_FromObject(str); 803 if (str == NULL) 804 return NULL; 805 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 806 PyUnicode_GET_SIZE(str), 807 errors, 808 byteorder), 809 PyUnicode_GET_SIZE(str)); 810 Py_DECREF(str); 811 return v; 812} 813 814static PyObject * 815utf_32_le_encode(PyObject *self, 816 PyObject *args) 817{ 818 PyObject *str, *v; 819 const char *errors = NULL; 820 821 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode", 822 &str, &errors)) 823 return NULL; 824 825 str = PyUnicode_FromObject(str); 826 if (str == NULL) 827 return NULL; 828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 829 PyUnicode_GET_SIZE(str), 830 errors, 831 -1), 832 PyUnicode_GET_SIZE(str)); 833 Py_DECREF(str); 834 return v; 835} 836 837static PyObject * 838utf_32_be_encode(PyObject *self, 839 PyObject *args) 840{ 841 PyObject *str, *v; 842 const char *errors = NULL; 843 844 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode", 845 &str, &errors)) 846 return NULL; 847 848 str = PyUnicode_FromObject(str); 849 if (str == NULL) 850 return NULL; 851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str), 852 PyUnicode_GET_SIZE(str), 853 errors, 854 +1), 855 PyUnicode_GET_SIZE(str)); 856 Py_DECREF(str); 857 return v; 858} 859 860static PyObject * 861unicode_escape_encode(PyObject *self, 862 PyObject *args) 863{ 864 PyObject *str, *v; 865 const char *errors = NULL; 866 867 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode", 868 &str, &errors)) 869 return NULL; 870 871 str = PyUnicode_FromObject(str); 872 if (str == NULL) 873 return NULL; 874 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str), 875 PyUnicode_GET_SIZE(str)), 876 PyUnicode_GET_SIZE(str)); 877 Py_DECREF(str); 878 return v; 879} 880 881static PyObject * 882raw_unicode_escape_encode(PyObject *self, 883 PyObject *args) 884{ 885 PyObject *str, *v; 886 const char *errors = NULL; 887 888 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode", 889 &str, &errors)) 890 return NULL; 891 892 str = PyUnicode_FromObject(str); 893 if (str == NULL) 894 return NULL; 895 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape( 896 PyUnicode_AS_UNICODE(str), 897 PyUnicode_GET_SIZE(str)), 898 PyUnicode_GET_SIZE(str)); 899 Py_DECREF(str); 900 return v; 901} 902 903static PyObject * 904latin_1_encode(PyObject *self, 905 PyObject *args) 906{ 907 PyObject *str, *v; 908 const char *errors = NULL; 909 910 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode", 911 &str, &errors)) 912 return NULL; 913 914 str = PyUnicode_FromObject(str); 915 if (str == NULL) 916 return NULL; 917 v = codec_tuple(PyUnicode_EncodeLatin1( 918 PyUnicode_AS_UNICODE(str), 919 PyUnicode_GET_SIZE(str), 920 errors), 921 PyUnicode_GET_SIZE(str)); 922 Py_DECREF(str); 923 return v; 924} 925 926static PyObject * 927ascii_encode(PyObject *self, 928 PyObject *args) 929{ 930 PyObject *str, *v; 931 const char *errors = NULL; 932 933 if (!PyArg_ParseTuple(args, "O|z:ascii_encode", 934 &str, &errors)) 935 return NULL; 936 937 str = PyUnicode_FromObject(str); 938 if (str == NULL) 939 return NULL; 940 v = codec_tuple(PyUnicode_EncodeASCII( 941 PyUnicode_AS_UNICODE(str), 942 PyUnicode_GET_SIZE(str), 943 errors), 944 PyUnicode_GET_SIZE(str)); 945 Py_DECREF(str); 946 return v; 947} 948 949static PyObject * 950charmap_encode(PyObject *self, 951 PyObject *args) 952{ 953 PyObject *str, *v; 954 const char *errors = NULL; 955 PyObject *mapping = NULL; 956 957 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode", 958 &str, &errors, &mapping)) 959 return NULL; 960 if (mapping == Py_None) 961 mapping = NULL; 962 963 str = PyUnicode_FromObject(str); 964 if (str == NULL) 965 return NULL; 966 v = codec_tuple(PyUnicode_EncodeCharmap( 967 PyUnicode_AS_UNICODE(str), 968 PyUnicode_GET_SIZE(str), 969 mapping, 970 errors), 971 PyUnicode_GET_SIZE(str)); 972 Py_DECREF(str); 973 return v; 974} 975 976static PyObject* 977charmap_build(PyObject *self, PyObject *args) 978{ 979 PyObject *map; 980 if (!PyArg_ParseTuple(args, "U:charmap_build", &map)) 981 return NULL; 982 return PyUnicode_BuildEncodingMap(map); 983} 984 985#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 986 987static PyObject * 988mbcs_encode(PyObject *self, 989 PyObject *args) 990{ 991 PyObject *str, *v; 992 const char *errors = NULL; 993 994 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode", 995 &str, &errors)) 996 return NULL; 997 998 str = PyUnicode_FromObject(str); 999 if (str == NULL) 1000 return NULL; 1001 v = codec_tuple(PyUnicode_EncodeMBCS( 1002 PyUnicode_AS_UNICODE(str), 1003 PyUnicode_GET_SIZE(str), 1004 errors), 1005 PyUnicode_GET_SIZE(str)); 1006 Py_DECREF(str); 1007 return v; 1008} 1009 1010#endif /* MS_WINDOWS */ 1011#endif /* Py_USING_UNICODE */ 1012 1013/* --- Error handler registry --------------------------------------------- */ 1014 1015PyDoc_STRVAR(register_error__doc__, 1016"register_error(errors, handler)\n\ 1017\n\ 1018Register the specified error handler under the name\n\ 1019errors. handler must be a callable object, that\n\ 1020will be called with an exception instance containing\n\ 1021information about the location of the encoding/decoding\n\ 1022error and must return a (replacement, new position) tuple."); 1023 1024static PyObject *register_error(PyObject *self, PyObject *args) 1025{ 1026 const char *name; 1027 PyObject *handler; 1028 1029 if (!PyArg_ParseTuple(args, "sO:register_error", 1030 &name, &handler)) 1031 return NULL; 1032 if (PyCodec_RegisterError(name, handler)) 1033 return NULL; 1034 Py_RETURN_NONE; 1035} 1036 1037PyDoc_STRVAR(lookup_error__doc__, 1038"lookup_error(errors) -> handler\n\ 1039\n\ 1040Return the error handler for the specified error handling name\n\ 1041or raise a LookupError, if no handler exists under this name."); 1042 1043static PyObject *lookup_error(PyObject *self, PyObject *args) 1044{ 1045 const char *name; 1046 1047 if (!PyArg_ParseTuple(args, "s:lookup_error", 1048 &name)) 1049 return NULL; 1050 return PyCodec_LookupError(name); 1051} 1052 1053/* --- Module API --------------------------------------------------------- */ 1054 1055static PyMethodDef _codecs_functions[] = { 1056 {"register", codec_register, METH_O, 1057 register__doc__}, 1058 {"lookup", codec_lookup, METH_VARARGS, 1059 lookup__doc__}, 1060 {"encode", codec_encode, METH_VARARGS, 1061 encode__doc__}, 1062 {"decode", codec_decode, METH_VARARGS, 1063 decode__doc__}, 1064 {"escape_encode", escape_encode, METH_VARARGS}, 1065 {"escape_decode", escape_decode, METH_VARARGS}, 1066#ifdef Py_USING_UNICODE 1067 {"utf_8_encode", utf_8_encode, METH_VARARGS}, 1068 {"utf_8_decode", utf_8_decode, METH_VARARGS}, 1069 {"utf_7_encode", utf_7_encode, METH_VARARGS}, 1070 {"utf_7_decode", utf_7_decode, METH_VARARGS}, 1071 {"utf_16_encode", utf_16_encode, METH_VARARGS}, 1072 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS}, 1073 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS}, 1074 {"utf_16_decode", utf_16_decode, METH_VARARGS}, 1075 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS}, 1076 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS}, 1077 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS}, 1078 {"utf_32_encode", utf_32_encode, METH_VARARGS}, 1079 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS}, 1080 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS}, 1081 {"utf_32_decode", utf_32_decode, METH_VARARGS}, 1082 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS}, 1083 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS}, 1084 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS}, 1085 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS}, 1086 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS}, 1087 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS}, 1088 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS}, 1089 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS}, 1090 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS}, 1091 {"latin_1_encode", latin_1_encode, METH_VARARGS}, 1092 {"latin_1_decode", latin_1_decode, METH_VARARGS}, 1093 {"ascii_encode", ascii_encode, METH_VARARGS}, 1094 {"ascii_decode", ascii_decode, METH_VARARGS}, 1095 {"charmap_encode", charmap_encode, METH_VARARGS}, 1096 {"charmap_decode", charmap_decode, METH_VARARGS}, 1097 {"charmap_build", charmap_build, METH_VARARGS}, 1098 {"readbuffer_encode", readbuffer_encode, METH_VARARGS}, 1099 {"charbuffer_encode", charbuffer_encode, METH_VARARGS}, 1100#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) 1101 {"mbcs_encode", mbcs_encode, METH_VARARGS}, 1102 {"mbcs_decode", mbcs_decode, METH_VARARGS}, 1103#endif 1104#endif /* Py_USING_UNICODE */ 1105 {"register_error", register_error, METH_VARARGS, 1106 register_error__doc__}, 1107 {"lookup_error", lookup_error, METH_VARARGS, 1108 lookup_error__doc__}, 1109 {NULL, NULL} /* sentinel */ 1110}; 1111 1112PyMODINIT_FUNC 1113init_codecs(void) 1114{ 1115 Py_InitModule("_codecs", _codecs_functions); 1116} 1117