codecs.c revision aacfcccdc39b074521d3e5d4b5a1b1e020662366
1/* ------------------------------------------------------------------------ 2 3 Python Codec Registry and support functions 4 5Written by Marc-Andre Lemburg (mal@lemburg.com). 6 7Copyright (c) Corporation for National Research Initiatives. 8 9 ------------------------------------------------------------------------ */ 10 11#include "Python.h" 12#include "ucnhash.h" 13#include <ctype.h> 14 15const char *Py_hexdigits = "0123456789abcdef"; 16 17/* --- Codec Registry ----------------------------------------------------- */ 18 19/* Import the standard encodings package which will register the first 20 codec search function. 21 22 This is done in a lazy way so that the Unicode implementation does 23 not downgrade startup time of scripts not needing it. 24 25 ImportErrors are silently ignored by this function. Only one try is 26 made. 27 28*/ 29 30static int _PyCodecRegistry_Init(void); /* Forward */ 31 32int PyCodec_Register(PyObject *search_function) 33{ 34 PyInterpreterState *interp = PyThreadState_GET()->interp; 35 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 36 goto onError; 37 if (search_function == NULL) { 38 PyErr_BadArgument(); 39 goto onError; 40 } 41 if (!PyCallable_Check(search_function)) { 42 PyErr_SetString(PyExc_TypeError, "argument must be callable"); 43 goto onError; 44 } 45 return PyList_Append(interp->codec_search_path, search_function); 46 47 onError: 48 return -1; 49} 50 51/* Convert a string to a normalized Python string: all characters are 52 converted to lower case, spaces are replaced with underscores. */ 53 54static 55PyObject *normalizestring(const char *string) 56{ 57 size_t i; 58 size_t len = strlen(string); 59 char *p; 60 PyObject *v; 61 62 if (len > PY_SSIZE_T_MAX) { 63 PyErr_SetString(PyExc_OverflowError, "string is too large"); 64 return NULL; 65 } 66 67 p = PyMem_Malloc(len + 1); 68 if (p == NULL) 69 return PyErr_NoMemory(); 70 for (i = 0; i < len; i++) { 71 char ch = string[i]; 72 if (ch == ' ') 73 ch = '-'; 74 else 75 ch = Py_TOLOWER(Py_CHARMASK(ch)); 76 p[i] = ch; 77 } 78 p[i] = '\0'; 79 v = PyUnicode_FromString(p); 80 if (v == NULL) 81 return NULL; 82 PyMem_Free(p); 83 return v; 84} 85 86/* Lookup the given encoding and return a tuple providing the codec 87 facilities. 88 89 The encoding string is looked up converted to all lower-case 90 characters. This makes encodings looked up through this mechanism 91 effectively case-insensitive. 92 93 If no codec is found, a LookupError is set and NULL returned. 94 95 As side effect, this tries to load the encodings package, if not 96 yet done. This is part of the lazy load strategy for the encodings 97 package. 98 99*/ 100 101PyObject *_PyCodec_Lookup(const char *encoding) 102{ 103 PyInterpreterState *interp; 104 PyObject *result, *args = NULL, *v; 105 Py_ssize_t i, len; 106 107 if (encoding == NULL) { 108 PyErr_BadArgument(); 109 goto onError; 110 } 111 112 interp = PyThreadState_GET()->interp; 113 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 114 goto onError; 115 116 /* Convert the encoding to a normalized Python string: all 117 characters are converted to lower case, spaces and hyphens are 118 replaced with underscores. */ 119 v = normalizestring(encoding); 120 if (v == NULL) 121 goto onError; 122 PyUnicode_InternInPlace(&v); 123 124 /* First, try to lookup the name in the registry dictionary */ 125 result = PyDict_GetItem(interp->codec_search_cache, v); 126 if (result != NULL) { 127 Py_INCREF(result); 128 Py_DECREF(v); 129 return result; 130 } 131 132 /* Next, scan the search functions in order of registration */ 133 args = PyTuple_New(1); 134 if (args == NULL) 135 goto onError; 136 PyTuple_SET_ITEM(args,0,v); 137 138 len = PyList_Size(interp->codec_search_path); 139 if (len < 0) 140 goto onError; 141 if (len == 0) { 142 PyErr_SetString(PyExc_LookupError, 143 "no codec search functions registered: " 144 "can't find encoding"); 145 goto onError; 146 } 147 148 for (i = 0; i < len; i++) { 149 PyObject *func; 150 151 func = PyList_GetItem(interp->codec_search_path, i); 152 if (func == NULL) 153 goto onError; 154 result = PyEval_CallObject(func, args); 155 if (result == NULL) 156 goto onError; 157 if (result == Py_None) { 158 Py_DECREF(result); 159 continue; 160 } 161 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { 162 PyErr_SetString(PyExc_TypeError, 163 "codec search functions must return 4-tuples"); 164 Py_DECREF(result); 165 goto onError; 166 } 167 break; 168 } 169 if (i == len) { 170 /* XXX Perhaps we should cache misses too ? */ 171 PyErr_Format(PyExc_LookupError, 172 "unknown encoding: %s", encoding); 173 goto onError; 174 } 175 176 /* Cache and return the result */ 177 if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { 178 Py_DECREF(result); 179 goto onError; 180 } 181 Py_DECREF(args); 182 return result; 183 184 onError: 185 Py_XDECREF(args); 186 return NULL; 187} 188 189int _PyCodec_Forget(const char *encoding) 190{ 191 PyInterpreterState *interp; 192 PyObject *v; 193 int result; 194 195 interp = PyThreadState_GET()->interp; 196 if (interp->codec_search_path == NULL) { 197 return -1; 198 } 199 200 /* Convert the encoding to a normalized Python string: all 201 characters are converted to lower case, spaces and hyphens are 202 replaced with underscores. */ 203 v = normalizestring(encoding); 204 if (v == NULL) { 205 return -1; 206 } 207 208 /* Drop the named codec from the internal cache */ 209 result = PyDict_DelItem(interp->codec_search_cache, v); 210 Py_DECREF(v); 211 212 return result; 213} 214 215/* Codec registry encoding check API. */ 216 217int PyCodec_KnownEncoding(const char *encoding) 218{ 219 PyObject *codecs; 220 221 codecs = _PyCodec_Lookup(encoding); 222 if (!codecs) { 223 PyErr_Clear(); 224 return 0; 225 } 226 else { 227 Py_DECREF(codecs); 228 return 1; 229 } 230} 231 232static 233PyObject *args_tuple(PyObject *object, 234 const char *errors) 235{ 236 PyObject *args; 237 238 args = PyTuple_New(1 + (errors != NULL)); 239 if (args == NULL) 240 return NULL; 241 Py_INCREF(object); 242 PyTuple_SET_ITEM(args,0,object); 243 if (errors) { 244 PyObject *v; 245 246 v = PyUnicode_FromString(errors); 247 if (v == NULL) { 248 Py_DECREF(args); 249 return NULL; 250 } 251 PyTuple_SET_ITEM(args, 1, v); 252 } 253 return args; 254} 255 256/* Helper function to get a codec item */ 257 258static 259PyObject *codec_getitem(const char *encoding, int index) 260{ 261 PyObject *codecs; 262 PyObject *v; 263 264 codecs = _PyCodec_Lookup(encoding); 265 if (codecs == NULL) 266 return NULL; 267 v = PyTuple_GET_ITEM(codecs, index); 268 Py_DECREF(codecs); 269 Py_INCREF(v); 270 return v; 271} 272 273/* Helper functions to create an incremental codec. */ 274static 275PyObject *codec_makeincrementalcodec(PyObject *codec_info, 276 const char *errors, 277 const char *attrname) 278{ 279 PyObject *ret, *inccodec; 280 281 inccodec = PyObject_GetAttrString(codec_info, attrname); 282 if (inccodec == NULL) 283 return NULL; 284 if (errors) 285 ret = PyObject_CallFunction(inccodec, "s", errors); 286 else 287 ret = PyObject_CallFunction(inccodec, NULL); 288 Py_DECREF(inccodec); 289 return ret; 290} 291 292static 293PyObject *codec_getincrementalcodec(const char *encoding, 294 const char *errors, 295 const char *attrname) 296{ 297 PyObject *codec_info, *ret; 298 299 codec_info = _PyCodec_Lookup(encoding); 300 if (codec_info == NULL) 301 return NULL; 302 ret = codec_makeincrementalcodec(codec_info, errors, attrname); 303 Py_DECREF(codec_info); 304 return ret; 305} 306 307/* Helper function to create a stream codec. */ 308 309static 310PyObject *codec_getstreamcodec(const char *encoding, 311 PyObject *stream, 312 const char *errors, 313 const int index) 314{ 315 PyObject *codecs, *streamcodec, *codeccls; 316 317 codecs = _PyCodec_Lookup(encoding); 318 if (codecs == NULL) 319 return NULL; 320 321 codeccls = PyTuple_GET_ITEM(codecs, index); 322 if (errors != NULL) 323 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors); 324 else 325 streamcodec = PyObject_CallFunction(codeccls, "O", stream); 326 Py_DECREF(codecs); 327 return streamcodec; 328} 329 330/* Helpers to work with the result of _PyCodec_Lookup 331 332 */ 333PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info, 334 const char *errors) 335{ 336 return codec_makeincrementalcodec(codec_info, errors, 337 "incrementaldecoder"); 338} 339 340PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info, 341 const char *errors) 342{ 343 return codec_makeincrementalcodec(codec_info, errors, 344 "incrementalencoder"); 345} 346 347 348/* Convenience APIs to query the Codec registry. 349 350 All APIs return a codec object with incremented refcount. 351 352 */ 353 354PyObject *PyCodec_Encoder(const char *encoding) 355{ 356 return codec_getitem(encoding, 0); 357} 358 359PyObject *PyCodec_Decoder(const char *encoding) 360{ 361 return codec_getitem(encoding, 1); 362} 363 364PyObject *PyCodec_IncrementalEncoder(const char *encoding, 365 const char *errors) 366{ 367 return codec_getincrementalcodec(encoding, errors, "incrementalencoder"); 368} 369 370PyObject *PyCodec_IncrementalDecoder(const char *encoding, 371 const char *errors) 372{ 373 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder"); 374} 375 376PyObject *PyCodec_StreamReader(const char *encoding, 377 PyObject *stream, 378 const char *errors) 379{ 380 return codec_getstreamcodec(encoding, stream, errors, 2); 381} 382 383PyObject *PyCodec_StreamWriter(const char *encoding, 384 PyObject *stream, 385 const char *errors) 386{ 387 return codec_getstreamcodec(encoding, stream, errors, 3); 388} 389 390/* Helper that tries to ensure the reported exception chain indicates the 391 * codec that was invoked to trigger the failure without changing the type 392 * of the exception raised. 393 */ 394static void 395wrap_codec_error(const char *operation, 396 const char *encoding) 397{ 398 /* TrySetFromCause will replace the active exception with a suitably 399 * updated clone if it can, otherwise it will leave the original 400 * exception alone. 401 */ 402 _PyErr_TrySetFromCause("%s with '%s' codec failed", 403 operation, encoding); 404} 405 406/* Encode an object (e.g. an Unicode object) using the given encoding 407 and return the resulting encoded object (usually a Python string). 408 409 errors is passed to the encoder factory as argument if non-NULL. */ 410 411static PyObject * 412_PyCodec_EncodeInternal(PyObject *object, 413 PyObject *encoder, 414 const char *encoding, 415 const char *errors) 416{ 417 PyObject *args = NULL, *result = NULL; 418 PyObject *v = NULL; 419 420 args = args_tuple(object, errors); 421 if (args == NULL) 422 goto onError; 423 424 result = PyEval_CallObject(encoder, args); 425 if (result == NULL) { 426 wrap_codec_error("encoding", encoding); 427 goto onError; 428 } 429 430 if (!PyTuple_Check(result) || 431 PyTuple_GET_SIZE(result) != 2) { 432 PyErr_SetString(PyExc_TypeError, 433 "encoder must return a tuple (object, integer)"); 434 goto onError; 435 } 436 v = PyTuple_GET_ITEM(result,0); 437 Py_INCREF(v); 438 /* We don't check or use the second (integer) entry. */ 439 440 Py_DECREF(args); 441 Py_DECREF(encoder); 442 Py_DECREF(result); 443 return v; 444 445 onError: 446 Py_XDECREF(result); 447 Py_XDECREF(args); 448 Py_XDECREF(encoder); 449 return NULL; 450} 451 452/* Decode an object (usually a Python string) using the given encoding 453 and return an equivalent object (e.g. an Unicode object). 454 455 errors is passed to the decoder factory as argument if non-NULL. */ 456 457static PyObject * 458_PyCodec_DecodeInternal(PyObject *object, 459 PyObject *decoder, 460 const char *encoding, 461 const char *errors) 462{ 463 PyObject *args = NULL, *result = NULL; 464 PyObject *v; 465 466 args = args_tuple(object, errors); 467 if (args == NULL) 468 goto onError; 469 470 result = PyEval_CallObject(decoder,args); 471 if (result == NULL) { 472 wrap_codec_error("decoding", encoding); 473 goto onError; 474 } 475 if (!PyTuple_Check(result) || 476 PyTuple_GET_SIZE(result) != 2) { 477 PyErr_SetString(PyExc_TypeError, 478 "decoder must return a tuple (object,integer)"); 479 goto onError; 480 } 481 v = PyTuple_GET_ITEM(result,0); 482 Py_INCREF(v); 483 /* We don't check or use the second (integer) entry. */ 484 485 Py_DECREF(args); 486 Py_DECREF(decoder); 487 Py_DECREF(result); 488 return v; 489 490 onError: 491 Py_XDECREF(args); 492 Py_XDECREF(decoder); 493 Py_XDECREF(result); 494 return NULL; 495} 496 497/* Generic encoding/decoding API */ 498PyObject *PyCodec_Encode(PyObject *object, 499 const char *encoding, 500 const char *errors) 501{ 502 PyObject *encoder; 503 504 encoder = PyCodec_Encoder(encoding); 505 if (encoder == NULL) 506 return NULL; 507 508 return _PyCodec_EncodeInternal(object, encoder, encoding, errors); 509} 510 511PyObject *PyCodec_Decode(PyObject *object, 512 const char *encoding, 513 const char *errors) 514{ 515 PyObject *decoder; 516 517 decoder = PyCodec_Decoder(encoding); 518 if (decoder == NULL) 519 return NULL; 520 521 return _PyCodec_DecodeInternal(object, decoder, encoding, errors); 522} 523 524/* Text encoding/decoding API */ 525PyObject * _PyCodec_LookupTextEncoding(const char *encoding, 526 const char *alternate_command) 527{ 528 _Py_IDENTIFIER(_is_text_encoding); 529 PyObject *codec; 530 PyObject *attr; 531 int is_text_codec; 532 533 codec = _PyCodec_Lookup(encoding); 534 if (codec == NULL) 535 return NULL; 536 537 /* Backwards compatibility: assume any raw tuple describes a text 538 * encoding, and the same for anything lacking the private 539 * attribute. 540 */ 541 if (!PyTuple_CheckExact(codec)) { 542 attr = _PyObject_GetAttrId(codec, &PyId__is_text_encoding); 543 if (attr == NULL) { 544 if (PyErr_ExceptionMatches(PyExc_AttributeError)) { 545 PyErr_Clear(); 546 } else { 547 Py_DECREF(codec); 548 return NULL; 549 } 550 } else { 551 is_text_codec = PyObject_IsTrue(attr); 552 Py_DECREF(attr); 553 if (!is_text_codec) { 554 Py_DECREF(codec); 555 PyErr_Format(PyExc_LookupError, 556 "'%.400s' is not a text encoding; " 557 "use %s to handle arbitrary codecs", 558 encoding, alternate_command); 559 return NULL; 560 } 561 } 562 } 563 564 /* This appears to be a valid text encoding */ 565 return codec; 566} 567 568 569static 570PyObject *codec_getitem_checked(const char *encoding, 571 const char *alternate_command, 572 int index) 573{ 574 PyObject *codec; 575 PyObject *v; 576 577 codec = _PyCodec_LookupTextEncoding(encoding, alternate_command); 578 if (codec == NULL) 579 return NULL; 580 581 v = PyTuple_GET_ITEM(codec, index); 582 Py_INCREF(v); 583 Py_DECREF(codec); 584 return v; 585} 586 587static PyObject * _PyCodec_TextEncoder(const char *encoding) 588{ 589 return codec_getitem_checked(encoding, "codecs.encode()", 0); 590} 591 592static PyObject * _PyCodec_TextDecoder(const char *encoding) 593{ 594 return codec_getitem_checked(encoding, "codecs.decode()", 1); 595} 596 597PyObject *_PyCodec_EncodeText(PyObject *object, 598 const char *encoding, 599 const char *errors) 600{ 601 PyObject *encoder; 602 603 encoder = _PyCodec_TextEncoder(encoding); 604 if (encoder == NULL) 605 return NULL; 606 607 return _PyCodec_EncodeInternal(object, encoder, encoding, errors); 608} 609 610PyObject *_PyCodec_DecodeText(PyObject *object, 611 const char *encoding, 612 const char *errors) 613{ 614 PyObject *decoder; 615 616 decoder = _PyCodec_TextDecoder(encoding); 617 if (decoder == NULL) 618 return NULL; 619 620 return _PyCodec_DecodeInternal(object, decoder, encoding, errors); 621} 622 623/* Register the error handling callback function error under the name 624 name. This function will be called by the codec when it encounters 625 an unencodable characters/undecodable bytes and doesn't know the 626 callback name, when name is specified as the error parameter 627 in the call to the encode/decode function. 628 Return 0 on success, -1 on error */ 629int PyCodec_RegisterError(const char *name, PyObject *error) 630{ 631 PyInterpreterState *interp = PyThreadState_GET()->interp; 632 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 633 return -1; 634 if (!PyCallable_Check(error)) { 635 PyErr_SetString(PyExc_TypeError, "handler must be callable"); 636 return -1; 637 } 638 return PyDict_SetItemString(interp->codec_error_registry, 639 name, error); 640} 641 642/* Lookup the error handling callback function registered under the 643 name error. As a special case NULL can be passed, in which case 644 the error handling callback for strict encoding will be returned. */ 645PyObject *PyCodec_LookupError(const char *name) 646{ 647 PyObject *handler = NULL; 648 649 PyInterpreterState *interp = PyThreadState_GET()->interp; 650 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 651 return NULL; 652 653 if (name==NULL) 654 name = "strict"; 655 handler = PyDict_GetItemString(interp->codec_error_registry, name); 656 if (!handler) 657 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); 658 else 659 Py_INCREF(handler); 660 return handler; 661} 662 663static void wrong_exception_type(PyObject *exc) 664{ 665 _Py_IDENTIFIER(__class__); 666 _Py_IDENTIFIER(__name__); 667 PyObject *type = _PyObject_GetAttrId(exc, &PyId___class__); 668 if (type != NULL) { 669 PyObject *name = _PyObject_GetAttrId(type, &PyId___name__); 670 Py_DECREF(type); 671 if (name != NULL) { 672 PyErr_Format(PyExc_TypeError, 673 "don't know how to handle %S in error callback", name); 674 Py_DECREF(name); 675 } 676 } 677} 678 679PyObject *PyCodec_StrictErrors(PyObject *exc) 680{ 681 if (PyExceptionInstance_Check(exc)) 682 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 683 else 684 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); 685 return NULL; 686} 687 688 689PyObject *PyCodec_IgnoreErrors(PyObject *exc) 690{ 691 Py_ssize_t end; 692 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 693 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 694 return NULL; 695 } 696 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 697 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 698 return NULL; 699 } 700 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { 701 if (PyUnicodeTranslateError_GetEnd(exc, &end)) 702 return NULL; 703 } 704 else { 705 wrong_exception_type(exc); 706 return NULL; 707 } 708 return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end); 709} 710 711 712PyObject *PyCodec_ReplaceErrors(PyObject *exc) 713{ 714 Py_ssize_t start, end, i, len; 715 716 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 717 PyObject *res; 718 int kind; 719 void *data; 720 if (PyUnicodeEncodeError_GetStart(exc, &start)) 721 return NULL; 722 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 723 return NULL; 724 len = end - start; 725 res = PyUnicode_New(len, '?'); 726 if (res == NULL) 727 return NULL; 728 kind = PyUnicode_KIND(res); 729 data = PyUnicode_DATA(res); 730 for (i = 0; i < len; ++i) 731 PyUnicode_WRITE(kind, data, i, '?'); 732 assert(_PyUnicode_CheckConsistency(res, 1)); 733 return Py_BuildValue("(Nn)", res, end); 734 } 735 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 736 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 737 return NULL; 738 return Py_BuildValue("(Cn)", 739 (int)Py_UNICODE_REPLACEMENT_CHARACTER, 740 end); 741 } 742 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { 743 PyObject *res; 744 int kind; 745 void *data; 746 if (PyUnicodeTranslateError_GetStart(exc, &start)) 747 return NULL; 748 if (PyUnicodeTranslateError_GetEnd(exc, &end)) 749 return NULL; 750 len = end - start; 751 res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER); 752 if (res == NULL) 753 return NULL; 754 kind = PyUnicode_KIND(res); 755 data = PyUnicode_DATA(res); 756 for (i=0; i < len; i++) 757 PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER); 758 assert(_PyUnicode_CheckConsistency(res, 1)); 759 return Py_BuildValue("(Nn)", res, end); 760 } 761 else { 762 wrong_exception_type(exc); 763 return NULL; 764 } 765} 766 767PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) 768{ 769 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 770 PyObject *restuple; 771 PyObject *object; 772 Py_ssize_t i; 773 Py_ssize_t start; 774 Py_ssize_t end; 775 PyObject *res; 776 unsigned char *outp; 777 Py_ssize_t ressize; 778 Py_UCS4 ch; 779 if (PyUnicodeEncodeError_GetStart(exc, &start)) 780 return NULL; 781 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 782 return NULL; 783 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 784 return NULL; 785 if (end - start > PY_SSIZE_T_MAX / (2+7+1)) 786 end = start + PY_SSIZE_T_MAX / (2+7+1); 787 for (i = start, ressize = 0; i < end; ++i) { 788 /* object is guaranteed to be "ready" */ 789 ch = PyUnicode_READ_CHAR(object, i); 790 if (ch<10) 791 ressize += 2+1+1; 792 else if (ch<100) 793 ressize += 2+2+1; 794 else if (ch<1000) 795 ressize += 2+3+1; 796 else if (ch<10000) 797 ressize += 2+4+1; 798 else if (ch<100000) 799 ressize += 2+5+1; 800 else if (ch<1000000) 801 ressize += 2+6+1; 802 else 803 ressize += 2+7+1; 804 } 805 /* allocate replacement */ 806 res = PyUnicode_New(ressize, 127); 807 if (res == NULL) { 808 Py_DECREF(object); 809 return NULL; 810 } 811 outp = PyUnicode_1BYTE_DATA(res); 812 /* generate replacement */ 813 for (i = start; i < end; ++i) { 814 int digits; 815 int base; 816 ch = PyUnicode_READ_CHAR(object, i); 817 *outp++ = '&'; 818 *outp++ = '#'; 819 if (ch<10) { 820 digits = 1; 821 base = 1; 822 } 823 else if (ch<100) { 824 digits = 2; 825 base = 10; 826 } 827 else if (ch<1000) { 828 digits = 3; 829 base = 100; 830 } 831 else if (ch<10000) { 832 digits = 4; 833 base = 1000; 834 } 835 else if (ch<100000) { 836 digits = 5; 837 base = 10000; 838 } 839 else if (ch<1000000) { 840 digits = 6; 841 base = 100000; 842 } 843 else { 844 digits = 7; 845 base = 1000000; 846 } 847 while (digits-->0) { 848 *outp++ = '0' + ch/base; 849 ch %= base; 850 base /= 10; 851 } 852 *outp++ = ';'; 853 } 854 assert(_PyUnicode_CheckConsistency(res, 1)); 855 restuple = Py_BuildValue("(Nn)", res, end); 856 Py_DECREF(object); 857 return restuple; 858 } 859 else { 860 wrong_exception_type(exc); 861 return NULL; 862 } 863} 864 865PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) 866{ 867 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 868 PyObject *restuple; 869 PyObject *object; 870 Py_ssize_t i; 871 Py_ssize_t start; 872 Py_ssize_t end; 873 PyObject *res; 874 unsigned char *outp; 875 Py_ssize_t ressize; 876 Py_UCS4 c; 877 if (PyUnicodeEncodeError_GetStart(exc, &start)) 878 return NULL; 879 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 880 return NULL; 881 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 882 return NULL; 883 if (end - start > PY_SSIZE_T_MAX / (1+1+8)) 884 end = start + PY_SSIZE_T_MAX / (1+1+8); 885 for (i = start, ressize = 0; i < end; ++i) { 886 /* object is guaranteed to be "ready" */ 887 c = PyUnicode_READ_CHAR(object, i); 888 if (c >= 0x10000) { 889 ressize += 1+1+8; 890 } 891 else if (c >= 0x100) { 892 ressize += 1+1+4; 893 } 894 else 895 ressize += 1+1+2; 896 } 897 res = PyUnicode_New(ressize, 127); 898 if (res == NULL) { 899 Py_DECREF(object); 900 return NULL; 901 } 902 for (i = start, outp = PyUnicode_1BYTE_DATA(res); 903 i < end; ++i) { 904 c = PyUnicode_READ_CHAR(object, i); 905 *outp++ = '\\'; 906 if (c >= 0x00010000) { 907 *outp++ = 'U'; 908 *outp++ = Py_hexdigits[(c>>28)&0xf]; 909 *outp++ = Py_hexdigits[(c>>24)&0xf]; 910 *outp++ = Py_hexdigits[(c>>20)&0xf]; 911 *outp++ = Py_hexdigits[(c>>16)&0xf]; 912 *outp++ = Py_hexdigits[(c>>12)&0xf]; 913 *outp++ = Py_hexdigits[(c>>8)&0xf]; 914 } 915 else if (c >= 0x100) { 916 *outp++ = 'u'; 917 *outp++ = Py_hexdigits[(c>>12)&0xf]; 918 *outp++ = Py_hexdigits[(c>>8)&0xf]; 919 } 920 else 921 *outp++ = 'x'; 922 *outp++ = Py_hexdigits[(c>>4)&0xf]; 923 *outp++ = Py_hexdigits[c&0xf]; 924 } 925 926 assert(_PyUnicode_CheckConsistency(res, 1)); 927 restuple = Py_BuildValue("(Nn)", res, end); 928 Py_DECREF(object); 929 return restuple; 930 } 931 else { 932 wrong_exception_type(exc); 933 return NULL; 934 } 935} 936 937static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; 938static int ucnhash_initialized = 0; 939 940PyObject *PyCodec_NameReplaceErrors(PyObject *exc) 941{ 942 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 943 PyObject *restuple; 944 PyObject *object; 945 Py_ssize_t i; 946 Py_ssize_t start; 947 Py_ssize_t end; 948 PyObject *res; 949 unsigned char *outp; 950 Py_ssize_t ressize; 951 int replsize; 952 Py_UCS4 c; 953 char buffer[256]; /* NAME_MAXLEN */ 954 if (PyUnicodeEncodeError_GetStart(exc, &start)) 955 return NULL; 956 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 957 return NULL; 958 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 959 return NULL; 960 if (!ucnhash_initialized) { 961 /* load the unicode data module */ 962 ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import( 963 PyUnicodeData_CAPSULE_NAME, 1); 964 ucnhash_initialized = 1; 965 } 966 for (i = start, ressize = 0; i < end; ++i) { 967 /* object is guaranteed to be "ready" */ 968 c = PyUnicode_READ_CHAR(object, i); 969 if (ucnhash_CAPI && 970 ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { 971 replsize = 1+1+1+strlen(buffer)+1; 972 } 973 else if (c >= 0x10000) { 974 replsize = 1+1+8; 975 } 976 else if (c >= 0x100) { 977 replsize = 1+1+4; 978 } 979 else 980 replsize = 1+1+2; 981 if (ressize > PY_SSIZE_T_MAX - replsize) 982 break; 983 ressize += replsize; 984 } 985 end = i; 986 res = PyUnicode_New(ressize, 127); 987 if (res==NULL) 988 return NULL; 989 for (i = start, outp = PyUnicode_1BYTE_DATA(res); 990 i < end; ++i) { 991 c = PyUnicode_READ_CHAR(object, i); 992 *outp++ = '\\'; 993 if (ucnhash_CAPI && 994 ucnhash_CAPI->getname(NULL, c, buffer, sizeof(buffer), 1)) { 995 *outp++ = 'N'; 996 *outp++ = '{'; 997 strcpy((char *)outp, buffer); 998 outp += strlen(buffer); 999 *outp++ = '}'; 1000 continue; 1001 } 1002 if (c >= 0x00010000) { 1003 *outp++ = 'U'; 1004 *outp++ = Py_hexdigits[(c>>28)&0xf]; 1005 *outp++ = Py_hexdigits[(c>>24)&0xf]; 1006 *outp++ = Py_hexdigits[(c>>20)&0xf]; 1007 *outp++ = Py_hexdigits[(c>>16)&0xf]; 1008 *outp++ = Py_hexdigits[(c>>12)&0xf]; 1009 *outp++ = Py_hexdigits[(c>>8)&0xf]; 1010 } 1011 else if (c >= 0x100) { 1012 *outp++ = 'u'; 1013 *outp++ = Py_hexdigits[(c>>12)&0xf]; 1014 *outp++ = Py_hexdigits[(c>>8)&0xf]; 1015 } 1016 else 1017 *outp++ = 'x'; 1018 *outp++ = Py_hexdigits[(c>>4)&0xf]; 1019 *outp++ = Py_hexdigits[c&0xf]; 1020 } 1021 1022 assert(out == start + ressize); 1023 assert(_PyUnicode_CheckConsistency(res, 1)); 1024 restuple = Py_BuildValue("(Nn)", res, end); 1025 Py_DECREF(object); 1026 return restuple; 1027 } 1028 else { 1029 wrong_exception_type(exc); 1030 return NULL; 1031 } 1032} 1033 1034#define ENC_UNKNOWN -1 1035#define ENC_UTF8 0 1036#define ENC_UTF16BE 1 1037#define ENC_UTF16LE 2 1038#define ENC_UTF32BE 3 1039#define ENC_UTF32LE 4 1040 1041static int 1042get_standard_encoding(const char *encoding, int *bytelength) 1043{ 1044 if (Py_TOLOWER(encoding[0]) == 'u' && 1045 Py_TOLOWER(encoding[1]) == 't' && 1046 Py_TOLOWER(encoding[2]) == 'f') { 1047 encoding += 3; 1048 if (*encoding == '-' || *encoding == '_' ) 1049 encoding++; 1050 if (encoding[0] == '8' && encoding[1] == '\0') { 1051 *bytelength = 3; 1052 return ENC_UTF8; 1053 } 1054 else if (encoding[0] == '1' && encoding[1] == '6') { 1055 encoding += 2; 1056 *bytelength = 2; 1057 if (*encoding == '\0') { 1058#ifdef WORDS_BIGENDIAN 1059 return ENC_UTF16BE; 1060#else 1061 return ENC_UTF16LE; 1062#endif 1063 } 1064 if (*encoding == '-' || *encoding == '_' ) 1065 encoding++; 1066 if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { 1067 if (Py_TOLOWER(encoding[0]) == 'b') 1068 return ENC_UTF16BE; 1069 if (Py_TOLOWER(encoding[0]) == 'l') 1070 return ENC_UTF16LE; 1071 } 1072 } 1073 else if (encoding[0] == '3' && encoding[1] == '2') { 1074 encoding += 2; 1075 *bytelength = 4; 1076 if (*encoding == '\0') { 1077#ifdef WORDS_BIGENDIAN 1078 return ENC_UTF32BE; 1079#else 1080 return ENC_UTF32LE; 1081#endif 1082 } 1083 if (*encoding == '-' || *encoding == '_' ) 1084 encoding++; 1085 if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { 1086 if (Py_TOLOWER(encoding[0]) == 'b') 1087 return ENC_UTF32BE; 1088 if (Py_TOLOWER(encoding[0]) == 'l') 1089 return ENC_UTF32LE; 1090 } 1091 } 1092 } 1093 else if (strcmp(encoding, "CP_UTF8") == 0) { 1094 *bytelength = 3; 1095 return ENC_UTF8; 1096 } 1097 return ENC_UNKNOWN; 1098} 1099 1100/* This handler is declared static until someone demonstrates 1101 a need to call it directly. */ 1102static PyObject * 1103PyCodec_SurrogatePassErrors(PyObject *exc) 1104{ 1105 PyObject *restuple; 1106 PyObject *object; 1107 PyObject *encode; 1108 char *encoding; 1109 int code; 1110 int bytelength; 1111 Py_ssize_t i; 1112 Py_ssize_t start; 1113 Py_ssize_t end; 1114 PyObject *res; 1115 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 1116 unsigned char *outp; 1117 if (PyUnicodeEncodeError_GetStart(exc, &start)) 1118 return NULL; 1119 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 1120 return NULL; 1121 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 1122 return NULL; 1123 if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) { 1124 Py_DECREF(object); 1125 return NULL; 1126 } 1127 if (!(encoding = PyUnicode_AsUTF8(encode))) { 1128 Py_DECREF(object); 1129 Py_DECREF(encode); 1130 return NULL; 1131 } 1132 code = get_standard_encoding(encoding, &bytelength); 1133 Py_DECREF(encode); 1134 if (code == ENC_UNKNOWN) { 1135 /* Not supported, fail with original exception */ 1136 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1137 Py_DECREF(object); 1138 return NULL; 1139 } 1140 1141 if (end - start > PY_SSIZE_T_MAX / bytelength) 1142 end = start + PY_SSIZE_T_MAX / bytelength; 1143 res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); 1144 if (!res) { 1145 Py_DECREF(object); 1146 return NULL; 1147 } 1148 outp = (unsigned char*)PyBytes_AsString(res); 1149 for (i = start; i < end; i++) { 1150 /* object is guaranteed to be "ready" */ 1151 Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); 1152 if (!Py_UNICODE_IS_SURROGATE(ch)) { 1153 /* Not a surrogate, fail with original exception */ 1154 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1155 Py_DECREF(res); 1156 Py_DECREF(object); 1157 return NULL; 1158 } 1159 switch (code) { 1160 case ENC_UTF8: 1161 *outp++ = (unsigned char)(0xe0 | (ch >> 12)); 1162 *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f)); 1163 *outp++ = (unsigned char)(0x80 | (ch & 0x3f)); 1164 break; 1165 case ENC_UTF16LE: 1166 *outp++ = (unsigned char) ch; 1167 *outp++ = (unsigned char)(ch >> 8); 1168 break; 1169 case ENC_UTF16BE: 1170 *outp++ = (unsigned char)(ch >> 8); 1171 *outp++ = (unsigned char) ch; 1172 break; 1173 case ENC_UTF32LE: 1174 *outp++ = (unsigned char) ch; 1175 *outp++ = (unsigned char)(ch >> 8); 1176 *outp++ = (unsigned char)(ch >> 16); 1177 *outp++ = (unsigned char)(ch >> 24); 1178 break; 1179 case ENC_UTF32BE: 1180 *outp++ = (unsigned char)(ch >> 24); 1181 *outp++ = (unsigned char)(ch >> 16); 1182 *outp++ = (unsigned char)(ch >> 8); 1183 *outp++ = (unsigned char) ch; 1184 break; 1185 } 1186 } 1187 restuple = Py_BuildValue("(On)", res, end); 1188 Py_DECREF(res); 1189 Py_DECREF(object); 1190 return restuple; 1191 } 1192 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 1193 unsigned char *p; 1194 Py_UCS4 ch = 0; 1195 if (PyUnicodeDecodeError_GetStart(exc, &start)) 1196 return NULL; 1197 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 1198 return NULL; 1199 if (!(object = PyUnicodeDecodeError_GetObject(exc))) 1200 return NULL; 1201 if (!(p = (unsigned char*)PyBytes_AsString(object))) { 1202 Py_DECREF(object); 1203 return NULL; 1204 } 1205 if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) { 1206 Py_DECREF(object); 1207 return NULL; 1208 } 1209 if (!(encoding = PyUnicode_AsUTF8(encode))) { 1210 Py_DECREF(object); 1211 Py_DECREF(encode); 1212 return NULL; 1213 } 1214 code = get_standard_encoding(encoding, &bytelength); 1215 Py_DECREF(encode); 1216 if (code == ENC_UNKNOWN) { 1217 /* Not supported, fail with original exception */ 1218 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1219 Py_DECREF(object); 1220 return NULL; 1221 } 1222 1223 /* Try decoding a single surrogate character. If 1224 there are more, let the codec call us again. */ 1225 p += start; 1226 if (PyBytes_GET_SIZE(object) - start >= bytelength) { 1227 switch (code) { 1228 case ENC_UTF8: 1229 if ((p[0] & 0xf0) == 0xe0 && 1230 (p[1] & 0xc0) == 0x80 && 1231 (p[2] & 0xc0) == 0x80) { 1232 /* it's a three-byte code */ 1233 ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); 1234 } 1235 break; 1236 case ENC_UTF16LE: 1237 ch = p[1] << 8 | p[0]; 1238 break; 1239 case ENC_UTF16BE: 1240 ch = p[0] << 8 | p[1]; 1241 break; 1242 case ENC_UTF32LE: 1243 ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; 1244 break; 1245 case ENC_UTF32BE: 1246 ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; 1247 break; 1248 } 1249 } 1250 1251 Py_DECREF(object); 1252 if (!Py_UNICODE_IS_SURROGATE(ch)) { 1253 /* it's not a surrogate - fail */ 1254 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1255 return NULL; 1256 } 1257 res = PyUnicode_FromOrdinal(ch); 1258 if (res == NULL) 1259 return NULL; 1260 return Py_BuildValue("(Nn)", res, start + bytelength); 1261 } 1262 else { 1263 wrong_exception_type(exc); 1264 return NULL; 1265 } 1266} 1267 1268static PyObject * 1269PyCodec_SurrogateEscapeErrors(PyObject *exc) 1270{ 1271 PyObject *restuple; 1272 PyObject *object; 1273 Py_ssize_t i; 1274 Py_ssize_t start; 1275 Py_ssize_t end; 1276 PyObject *res; 1277 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 1278 char *outp; 1279 if (PyUnicodeEncodeError_GetStart(exc, &start)) 1280 return NULL; 1281 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 1282 return NULL; 1283 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 1284 return NULL; 1285 res = PyBytes_FromStringAndSize(NULL, end-start); 1286 if (!res) { 1287 Py_DECREF(object); 1288 return NULL; 1289 } 1290 outp = PyBytes_AsString(res); 1291 for (i = start; i < end; i++) { 1292 /* object is guaranteed to be "ready" */ 1293 Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); 1294 if (ch < 0xdc80 || ch > 0xdcff) { 1295 /* Not a UTF-8b surrogate, fail with original exception */ 1296 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1297 Py_DECREF(res); 1298 Py_DECREF(object); 1299 return NULL; 1300 } 1301 *outp++ = ch - 0xdc00; 1302 } 1303 restuple = Py_BuildValue("(On)", res, end); 1304 Py_DECREF(res); 1305 Py_DECREF(object); 1306 return restuple; 1307 } 1308 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 1309 PyObject *str; 1310 unsigned char *p; 1311 Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ 1312 int consumed = 0; 1313 if (PyUnicodeDecodeError_GetStart(exc, &start)) 1314 return NULL; 1315 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 1316 return NULL; 1317 if (!(object = PyUnicodeDecodeError_GetObject(exc))) 1318 return NULL; 1319 if (!(p = (unsigned char*)PyBytes_AsString(object))) { 1320 Py_DECREF(object); 1321 return NULL; 1322 } 1323 while (consumed < 4 && consumed < end-start) { 1324 /* Refuse to escape ASCII bytes. */ 1325 if (p[start+consumed] < 128) 1326 break; 1327 ch[consumed] = 0xdc00 + p[start+consumed]; 1328 consumed++; 1329 } 1330 Py_DECREF(object); 1331 if (!consumed) { 1332 /* codec complained about ASCII byte. */ 1333 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 1334 return NULL; 1335 } 1336 str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); 1337 if (str == NULL) 1338 return NULL; 1339 return Py_BuildValue("(Nn)", str, start+consumed); 1340 } 1341 else { 1342 wrong_exception_type(exc); 1343 return NULL; 1344 } 1345} 1346 1347 1348static PyObject *strict_errors(PyObject *self, PyObject *exc) 1349{ 1350 return PyCodec_StrictErrors(exc); 1351} 1352 1353 1354static PyObject *ignore_errors(PyObject *self, PyObject *exc) 1355{ 1356 return PyCodec_IgnoreErrors(exc); 1357} 1358 1359 1360static PyObject *replace_errors(PyObject *self, PyObject *exc) 1361{ 1362 return PyCodec_ReplaceErrors(exc); 1363} 1364 1365 1366static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc) 1367{ 1368 return PyCodec_XMLCharRefReplaceErrors(exc); 1369} 1370 1371 1372static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) 1373{ 1374 return PyCodec_BackslashReplaceErrors(exc); 1375} 1376 1377static PyObject *namereplace_errors(PyObject *self, PyObject *exc) 1378{ 1379 return PyCodec_NameReplaceErrors(exc); 1380} 1381 1382static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) 1383{ 1384 return PyCodec_SurrogatePassErrors(exc); 1385} 1386 1387static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) 1388{ 1389 return PyCodec_SurrogateEscapeErrors(exc); 1390} 1391 1392static int _PyCodecRegistry_Init(void) 1393{ 1394 static struct { 1395 char *name; 1396 PyMethodDef def; 1397 } methods[] = 1398 { 1399 { 1400 "strict", 1401 { 1402 "strict_errors", 1403 strict_errors, 1404 METH_O, 1405 PyDoc_STR("Implements the 'strict' error handling, which " 1406 "raises a UnicodeError on coding errors.") 1407 } 1408 }, 1409 { 1410 "ignore", 1411 { 1412 "ignore_errors", 1413 ignore_errors, 1414 METH_O, 1415 PyDoc_STR("Implements the 'ignore' error handling, which " 1416 "ignores malformed data and continues.") 1417 } 1418 }, 1419 { 1420 "replace", 1421 { 1422 "replace_errors", 1423 replace_errors, 1424 METH_O, 1425 PyDoc_STR("Implements the 'replace' error handling, which " 1426 "replaces malformed data with a replacement marker.") 1427 } 1428 }, 1429 { 1430 "xmlcharrefreplace", 1431 { 1432 "xmlcharrefreplace_errors", 1433 xmlcharrefreplace_errors, 1434 METH_O, 1435 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, " 1436 "which replaces an unencodable character with the " 1437 "appropriate XML character reference.") 1438 } 1439 }, 1440 { 1441 "backslashreplace", 1442 { 1443 "backslashreplace_errors", 1444 backslashreplace_errors, 1445 METH_O, 1446 PyDoc_STR("Implements the 'backslashreplace' error handling, " 1447 "which replaces an unencodable character with a " 1448 "backslashed escape sequence.") 1449 } 1450 }, 1451 { 1452 "namereplace", 1453 { 1454 "namereplace_errors", 1455 namereplace_errors, 1456 METH_O, 1457 PyDoc_STR("Implements the 'namereplace' error handling, " 1458 "which replaces an unencodable character with a " 1459 "\\N{...} escape sequence.") 1460 } 1461 }, 1462 { 1463 "surrogatepass", 1464 { 1465 "surrogatepass", 1466 surrogatepass_errors, 1467 METH_O 1468 } 1469 }, 1470 { 1471 "surrogateescape", 1472 { 1473 "surrogateescape", 1474 surrogateescape_errors, 1475 METH_O 1476 } 1477 } 1478 }; 1479 1480 PyInterpreterState *interp = PyThreadState_GET()->interp; 1481 PyObject *mod; 1482 unsigned i; 1483 1484 if (interp->codec_search_path != NULL) 1485 return 0; 1486 1487 interp->codec_search_path = PyList_New(0); 1488 interp->codec_search_cache = PyDict_New(); 1489 interp->codec_error_registry = PyDict_New(); 1490 1491 if (interp->codec_error_registry) { 1492 for (i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { 1493 PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); 1494 int res; 1495 if (!func) 1496 Py_FatalError("can't initialize codec error registry"); 1497 res = PyCodec_RegisterError(methods[i].name, func); 1498 Py_DECREF(func); 1499 if (res) 1500 Py_FatalError("can't initialize codec error registry"); 1501 } 1502 } 1503 1504 if (interp->codec_search_path == NULL || 1505 interp->codec_search_cache == NULL || 1506 interp->codec_error_registry == NULL) 1507 Py_FatalError("can't initialize codec registry"); 1508 1509 mod = PyImport_ImportModuleNoBlock("encodings"); 1510 if (mod == NULL) { 1511 return -1; 1512 } 1513 Py_DECREF(mod); 1514 interp->codecs_initialized = 1; 1515 return 0; 1516} 1517