1/* ------------------------------------------------------------------------ 2 3 Python Codec Registry and support functions 4 5Written by Marc-Andre Lemburg (mal@lemburg.com). 6 7Copyright (c) Corporation for National Research Initiatives. 8 9 ------------------------------------------------------------------------ */ 10 11#include "Python.h" 12#include <ctype.h> 13 14/* --- Codec Registry ----------------------------------------------------- */ 15 16/* Import the standard encodings package which will register the first 17 codec search function. 18 19 This is done in a lazy way so that the Unicode implementation does 20 not downgrade startup time of scripts not needing it. 21 22 ImportErrors are silently ignored by this function. Only one try is 23 made. 24 25*/ 26 27static int _PyCodecRegistry_Init(void); /* Forward */ 28 29int PyCodec_Register(PyObject *search_function) 30{ 31 PyInterpreterState *interp = PyThreadState_GET()->interp; 32 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 33 goto onError; 34 if (search_function == NULL) { 35 PyErr_BadArgument(); 36 goto onError; 37 } 38 if (!PyCallable_Check(search_function)) { 39 PyErr_SetString(PyExc_TypeError, "argument must be callable"); 40 goto onError; 41 } 42 return PyList_Append(interp->codec_search_path, search_function); 43 44 onError: 45 return -1; 46} 47 48/* Convert a string to a normalized Python string: all characters are 49 converted to lower case, spaces are replaced with underscores. */ 50 51static 52PyObject *normalizestring(const char *string) 53{ 54 register size_t i; 55 size_t len = strlen(string); 56 char *p; 57 PyObject *v; 58 59 if (len > PY_SSIZE_T_MAX) { 60 PyErr_SetString(PyExc_OverflowError, "string is too large"); 61 return NULL; 62 } 63 64 v = PyString_FromStringAndSize(NULL, len); 65 if (v == NULL) 66 return NULL; 67 p = PyString_AS_STRING(v); 68 for (i = 0; i < len; i++) { 69 register char ch = string[i]; 70 if (ch == ' ') 71 ch = '-'; 72 else 73 ch = Py_TOLOWER(Py_CHARMASK(ch)); 74 p[i] = ch; 75 } 76 return v; 77} 78 79/* Lookup the given encoding and return a tuple providing the codec 80 facilities. 81 82 The encoding string is looked up converted to all lower-case 83 characters. This makes encodings looked up through this mechanism 84 effectively case-insensitive. 85 86 If no codec is found, a LookupError is set and NULL returned. 87 88 As side effect, this tries to load the encodings package, if not 89 yet done. This is part of the lazy load strategy for the encodings 90 package. 91 92*/ 93 94PyObject *_PyCodec_Lookup(const char *encoding) 95{ 96 PyInterpreterState *interp; 97 PyObject *result, *args = NULL, *v; 98 Py_ssize_t i, len; 99 100 if (encoding == NULL) { 101 PyErr_BadArgument(); 102 goto onError; 103 } 104 105 interp = PyThreadState_GET()->interp; 106 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 107 goto onError; 108 109 /* Convert the encoding to a normalized Python string: all 110 characters are converted to lower case, spaces and hyphens are 111 replaced with underscores. */ 112 v = normalizestring(encoding); 113 if (v == NULL) 114 goto onError; 115 PyString_InternInPlace(&v); 116 117 /* First, try to lookup the name in the registry dictionary */ 118 result = PyDict_GetItem(interp->codec_search_cache, v); 119 if (result != NULL) { 120 Py_INCREF(result); 121 Py_DECREF(v); 122 return result; 123 } 124 125 /* Next, scan the search functions in order of registration */ 126 args = PyTuple_New(1); 127 if (args == NULL) 128 goto onError; 129 PyTuple_SET_ITEM(args,0,v); 130 131 len = PyList_Size(interp->codec_search_path); 132 if (len < 0) 133 goto onError; 134 if (len == 0) { 135 PyErr_SetString(PyExc_LookupError, 136 "no codec search functions registered: " 137 "can't find encoding"); 138 goto onError; 139 } 140 141 for (i = 0; i < len; i++) { 142 PyObject *func; 143 144 func = PyList_GetItem(interp->codec_search_path, i); 145 if (func == NULL) 146 goto onError; 147 result = PyEval_CallObject(func, args); 148 if (result == NULL) 149 goto onError; 150 if (result == Py_None) { 151 Py_DECREF(result); 152 continue; 153 } 154 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { 155 PyErr_SetString(PyExc_TypeError, 156 "codec search functions must return 4-tuples"); 157 Py_DECREF(result); 158 goto onError; 159 } 160 break; 161 } 162 if (i == len) { 163 /* XXX Perhaps we should cache misses too ? */ 164 PyErr_Format(PyExc_LookupError, 165 "unknown encoding: %s", encoding); 166 goto onError; 167 } 168 169 /* Cache and return the result */ 170 PyDict_SetItem(interp->codec_search_cache, v, result); 171 Py_DECREF(args); 172 return result; 173 174 onError: 175 Py_XDECREF(args); 176 return NULL; 177} 178 179static 180PyObject *args_tuple(PyObject *object, 181 const char *errors) 182{ 183 PyObject *args; 184 185 args = PyTuple_New(1 + (errors != NULL)); 186 if (args == NULL) 187 return NULL; 188 Py_INCREF(object); 189 PyTuple_SET_ITEM(args,0,object); 190 if (errors) { 191 PyObject *v; 192 193 v = PyString_FromString(errors); 194 if (v == NULL) { 195 Py_DECREF(args); 196 return NULL; 197 } 198 PyTuple_SET_ITEM(args, 1, v); 199 } 200 return args; 201} 202 203/* Helper function to get a codec item */ 204 205static 206PyObject *codec_getitem(const char *encoding, int index) 207{ 208 PyObject *codecs; 209 PyObject *v; 210 211 codecs = _PyCodec_Lookup(encoding); 212 if (codecs == NULL) 213 return NULL; 214 v = PyTuple_GET_ITEM(codecs, index); 215 Py_DECREF(codecs); 216 Py_INCREF(v); 217 return v; 218} 219 220/* Helper function to create an incremental codec. */ 221 222static 223PyObject *codec_getincrementalcodec(const char *encoding, 224 const char *errors, 225 const char *attrname) 226{ 227 PyObject *codecs, *ret, *inccodec; 228 229 codecs = _PyCodec_Lookup(encoding); 230 if (codecs == NULL) 231 return NULL; 232 inccodec = PyObject_GetAttrString(codecs, attrname); 233 Py_DECREF(codecs); 234 if (inccodec == NULL) 235 return NULL; 236 if (errors) 237 ret = PyObject_CallFunction(inccodec, "s", errors); 238 else 239 ret = PyObject_CallFunction(inccodec, NULL); 240 Py_DECREF(inccodec); 241 return ret; 242} 243 244/* Helper function to create a stream codec. */ 245 246static 247PyObject *codec_getstreamcodec(const char *encoding, 248 PyObject *stream, 249 const char *errors, 250 const int index) 251{ 252 PyObject *codecs, *streamcodec, *codeccls; 253 254 codecs = _PyCodec_Lookup(encoding); 255 if (codecs == NULL) 256 return NULL; 257 258 codeccls = PyTuple_GET_ITEM(codecs, index); 259 if (errors != NULL) 260 streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors); 261 else 262 streamcodec = PyObject_CallFunction(codeccls, "O", stream); 263 Py_DECREF(codecs); 264 return streamcodec; 265} 266 267/* Convenience APIs to query the Codec registry. 268 269 All APIs return a codec object with incremented refcount. 270 271 */ 272 273PyObject *PyCodec_Encoder(const char *encoding) 274{ 275 return codec_getitem(encoding, 0); 276} 277 278PyObject *PyCodec_Decoder(const char *encoding) 279{ 280 return codec_getitem(encoding, 1); 281} 282 283PyObject *PyCodec_IncrementalEncoder(const char *encoding, 284 const char *errors) 285{ 286 return codec_getincrementalcodec(encoding, errors, "incrementalencoder"); 287} 288 289PyObject *PyCodec_IncrementalDecoder(const char *encoding, 290 const char *errors) 291{ 292 return codec_getincrementalcodec(encoding, errors, "incrementaldecoder"); 293} 294 295PyObject *PyCodec_StreamReader(const char *encoding, 296 PyObject *stream, 297 const char *errors) 298{ 299 return codec_getstreamcodec(encoding, stream, errors, 2); 300} 301 302PyObject *PyCodec_StreamWriter(const char *encoding, 303 PyObject *stream, 304 const char *errors) 305{ 306 return codec_getstreamcodec(encoding, stream, errors, 3); 307} 308 309/* Encode an object (e.g. an Unicode object) using the given encoding 310 and return the resulting encoded object (usually a Python string). 311 312 errors is passed to the encoder factory as argument if non-NULL. */ 313 314PyObject *PyCodec_Encode(PyObject *object, 315 const char *encoding, 316 const char *errors) 317{ 318 PyObject *encoder = NULL; 319 PyObject *args = NULL, *result = NULL; 320 PyObject *v; 321 322 encoder = PyCodec_Encoder(encoding); 323 if (encoder == NULL) 324 goto onError; 325 326 args = args_tuple(object, errors); 327 if (args == NULL) 328 goto onError; 329 330 result = PyEval_CallObject(encoder,args); 331 if (result == NULL) 332 goto onError; 333 334 if (!PyTuple_Check(result) || 335 PyTuple_GET_SIZE(result) != 2) { 336 PyErr_SetString(PyExc_TypeError, 337 "encoder must return a tuple (object,integer)"); 338 goto onError; 339 } 340 v = PyTuple_GET_ITEM(result,0); 341 Py_INCREF(v); 342 /* We don't check or use the second (integer) entry. */ 343 344 Py_DECREF(args); 345 Py_DECREF(encoder); 346 Py_DECREF(result); 347 return v; 348 349 onError: 350 Py_XDECREF(result); 351 Py_XDECREF(args); 352 Py_XDECREF(encoder); 353 return NULL; 354} 355 356/* Decode an object (usually a Python string) using the given encoding 357 and return an equivalent object (e.g. an Unicode object). 358 359 errors is passed to the decoder factory as argument if non-NULL. */ 360 361PyObject *PyCodec_Decode(PyObject *object, 362 const char *encoding, 363 const char *errors) 364{ 365 PyObject *decoder = NULL; 366 PyObject *args = NULL, *result = NULL; 367 PyObject *v; 368 369 decoder = PyCodec_Decoder(encoding); 370 if (decoder == NULL) 371 goto onError; 372 373 args = args_tuple(object, errors); 374 if (args == NULL) 375 goto onError; 376 377 result = PyEval_CallObject(decoder,args); 378 if (result == NULL) 379 goto onError; 380 if (!PyTuple_Check(result) || 381 PyTuple_GET_SIZE(result) != 2) { 382 PyErr_SetString(PyExc_TypeError, 383 "decoder must return a tuple (object,integer)"); 384 goto onError; 385 } 386 v = PyTuple_GET_ITEM(result,0); 387 Py_INCREF(v); 388 /* We don't check or use the second (integer) entry. */ 389 390 Py_DECREF(args); 391 Py_DECREF(decoder); 392 Py_DECREF(result); 393 return v; 394 395 onError: 396 Py_XDECREF(args); 397 Py_XDECREF(decoder); 398 Py_XDECREF(result); 399 return NULL; 400} 401 402/* Register the error handling callback function error under the name 403 name. This function will be called by the codec when it encounters 404 an unencodable characters/undecodable bytes and doesn't know the 405 callback name, when name is specified as the error parameter 406 in the call to the encode/decode function. 407 Return 0 on success, -1 on error */ 408int PyCodec_RegisterError(const char *name, PyObject *error) 409{ 410 PyInterpreterState *interp = PyThreadState_GET()->interp; 411 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 412 return -1; 413 if (!PyCallable_Check(error)) { 414 PyErr_SetString(PyExc_TypeError, "handler must be callable"); 415 return -1; 416 } 417 return PyDict_SetItemString(interp->codec_error_registry, 418 (char *)name, error); 419} 420 421/* Lookup the error handling callback function registered under the 422 name error. As a special case NULL can be passed, in which case 423 the error handling callback for strict encoding will be returned. */ 424PyObject *PyCodec_LookupError(const char *name) 425{ 426 PyObject *handler = NULL; 427 428 PyInterpreterState *interp = PyThreadState_GET()->interp; 429 if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) 430 return NULL; 431 432 if (name==NULL) 433 name = "strict"; 434 handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name); 435 if (!handler) 436 PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); 437 else 438 Py_INCREF(handler); 439 return handler; 440} 441 442static void wrong_exception_type(PyObject *exc) 443{ 444 PyObject *type = PyObject_GetAttrString(exc, "__class__"); 445 if (type != NULL) { 446 PyObject *name = PyObject_GetAttrString(type, "__name__"); 447 Py_DECREF(type); 448 if (name != NULL) { 449 PyObject *string = PyObject_Str(name); 450 Py_DECREF(name); 451 if (string != NULL) { 452 PyErr_Format(PyExc_TypeError, 453 "don't know how to handle %.400s in error callback", 454 PyString_AS_STRING(string)); 455 Py_DECREF(string); 456 } 457 } 458 } 459} 460 461PyObject *PyCodec_StrictErrors(PyObject *exc) 462{ 463 if (PyExceptionInstance_Check(exc)) 464 PyErr_SetObject(PyExceptionInstance_Class(exc), exc); 465 else 466 PyErr_SetString(PyExc_TypeError, "codec must pass exception instance"); 467 return NULL; 468} 469 470 471#ifdef Py_USING_UNICODE 472PyObject *PyCodec_IgnoreErrors(PyObject *exc) 473{ 474 Py_ssize_t end; 475 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 476 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 477 return NULL; 478 } 479 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 480 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 481 return NULL; 482 } 483 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { 484 if (PyUnicodeTranslateError_GetEnd(exc, &end)) 485 return NULL; 486 } 487 else { 488 wrong_exception_type(exc); 489 return NULL; 490 } 491 /* ouch: passing NULL, 0, pos gives None instead of u'' */ 492 return Py_BuildValue("(u#n)", &end, 0, end); 493} 494 495 496PyObject *PyCodec_ReplaceErrors(PyObject *exc) 497{ 498 PyObject *restuple; 499 Py_ssize_t start; 500 Py_ssize_t end; 501 Py_ssize_t i; 502 503 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 504 PyObject *res; 505 Py_UNICODE *p; 506 if (PyUnicodeEncodeError_GetStart(exc, &start)) 507 return NULL; 508 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 509 return NULL; 510 res = PyUnicode_FromUnicode(NULL, end-start); 511 if (res == NULL) 512 return NULL; 513 for (p = PyUnicode_AS_UNICODE(res), i = start; 514 i<end; ++p, ++i) 515 *p = '?'; 516 restuple = Py_BuildValue("(On)", res, end); 517 Py_DECREF(res); 518 return restuple; 519 } 520 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { 521 Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER; 522 if (PyUnicodeDecodeError_GetEnd(exc, &end)) 523 return NULL; 524 return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end); 525 } 526 else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { 527 PyObject *res; 528 Py_UNICODE *p; 529 if (PyUnicodeTranslateError_GetStart(exc, &start)) 530 return NULL; 531 if (PyUnicodeTranslateError_GetEnd(exc, &end)) 532 return NULL; 533 res = PyUnicode_FromUnicode(NULL, end-start); 534 if (res == NULL) 535 return NULL; 536 for (p = PyUnicode_AS_UNICODE(res), i = start; 537 i<end; ++p, ++i) 538 *p = Py_UNICODE_REPLACEMENT_CHARACTER; 539 restuple = Py_BuildValue("(On)", res, end); 540 Py_DECREF(res); 541 return restuple; 542 } 543 else { 544 wrong_exception_type(exc); 545 return NULL; 546 } 547} 548 549PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc) 550{ 551 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 552 PyObject *restuple; 553 PyObject *object; 554 Py_ssize_t start; 555 Py_ssize_t end; 556 PyObject *res; 557 Py_UNICODE *p; 558 Py_UNICODE *startp; 559 Py_UNICODE *e; 560 Py_UNICODE *outp; 561 Py_ssize_t ressize; 562 if (PyUnicodeEncodeError_GetStart(exc, &start)) 563 return NULL; 564 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 565 return NULL; 566 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 567 return NULL; 568 startp = PyUnicode_AS_UNICODE(object); 569 if (end - start > PY_SSIZE_T_MAX / (2+7+1)) { 570 end = start + PY_SSIZE_T_MAX / (2+7+1); 571#ifndef Py_UNICODE_WIDE 572 if (0xD800 <= startp[end - 1] && startp[end - 1] <= 0xDBFF) 573 end--; 574#endif 575 } 576 e = startp + end; 577 for (p = startp+start, ressize = 0; p < e;) { 578 Py_UCS4 ch = *p++; 579#ifndef Py_UNICODE_WIDE 580 if ((0xD800 <= ch && ch <= 0xDBFF) && 581 (p < e) && 582 (0xDC00 <= *p && *p <= 0xDFFF)) { 583 ch = ((((ch & 0x03FF) << 10) | 584 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000); 585 } 586#endif 587 if (ch < 10) 588 ressize += 2+1+1; 589 else if (ch < 100) 590 ressize += 2+2+1; 591 else if (ch < 1000) 592 ressize += 2+3+1; 593 else if (ch < 10000) 594 ressize += 2+4+1; 595 else if (ch < 100000) 596 ressize += 2+5+1; 597 else if (ch < 1000000) 598 ressize += 2+6+1; 599 else 600 ressize += 2+7+1; 601 } 602 /* allocate replacement */ 603 res = PyUnicode_FromUnicode(NULL, ressize); 604 if (res == NULL) { 605 Py_DECREF(object); 606 return NULL; 607 } 608 /* generate replacement */ 609 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) { 610 int digits; 611 int base; 612 Py_UCS4 ch = *p++; 613#ifndef Py_UNICODE_WIDE 614 if ((0xD800 <= ch && ch <= 0xDBFF) && 615 (p < startp+end) && 616 (0xDC00 <= *p && *p <= 0xDFFF)) { 617 ch = ((((ch & 0x03FF) << 10) | 618 ((Py_UCS4)*p++ & 0x03FF)) + 0x10000); 619 } 620#endif 621 *outp++ = '&'; 622 *outp++ = '#'; 623 if (ch < 10) { 624 digits = 1; 625 base = 1; 626 } 627 else if (ch < 100) { 628 digits = 2; 629 base = 10; 630 } 631 else if (ch < 1000) { 632 digits = 3; 633 base = 100; 634 } 635 else if (ch < 10000) { 636 digits = 4; 637 base = 1000; 638 } 639 else if (ch < 100000) { 640 digits = 5; 641 base = 10000; 642 } 643 else if (ch < 1000000) { 644 digits = 6; 645 base = 100000; 646 } 647 else { 648 digits = 7; 649 base = 1000000; 650 } 651 while (digits-->0) { 652 *outp++ = '0' + ch/base; 653 ch %= base; 654 base /= 10; 655 } 656 *outp++ = ';'; 657 } 658 restuple = Py_BuildValue("(On)", res, end); 659 Py_DECREF(res); 660 Py_DECREF(object); 661 return restuple; 662 } 663 else { 664 wrong_exception_type(exc); 665 return NULL; 666 } 667} 668 669static Py_UNICODE hexdigits[] = { 670 '0', '1', '2', '3', '4', '5', '6', '7', 671 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' 672}; 673 674PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) 675{ 676 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { 677 PyObject *restuple; 678 PyObject *object; 679 Py_ssize_t start; 680 Py_ssize_t end; 681 PyObject *res; 682 Py_UNICODE *p; 683 Py_UNICODE *startp; 684 Py_UNICODE *outp; 685 Py_ssize_t ressize; 686 if (PyUnicodeEncodeError_GetStart(exc, &start)) 687 return NULL; 688 if (PyUnicodeEncodeError_GetEnd(exc, &end)) 689 return NULL; 690 if (!(object = PyUnicodeEncodeError_GetObject(exc))) 691 return NULL; 692 if (end - start > PY_SSIZE_T_MAX / (1+1+8)) 693 end = start + PY_SSIZE_T_MAX / (1+1+8); 694 startp = PyUnicode_AS_UNICODE(object); 695 for (p = startp+start, ressize = 0; p < startp+end; ++p) { 696#ifdef Py_UNICODE_WIDE 697 if (*p >= 0x00010000) 698 ressize += 1+1+8; 699 else 700#endif 701 if (*p >= 0x100) { 702 ressize += 1+1+4; 703 } 704 else 705 ressize += 1+1+2; 706 } 707 res = PyUnicode_FromUnicode(NULL, ressize); 708 if (res == NULL) { 709 Py_DECREF(object); 710 return NULL; 711 } 712 for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); 713 p < startp+end; ++p) { 714 Py_UNICODE c = *p; 715 *outp++ = '\\'; 716#ifdef Py_UNICODE_WIDE 717 if (c >= 0x00010000) { 718 *outp++ = 'U'; 719 *outp++ = hexdigits[(c>>28)&0xf]; 720 *outp++ = hexdigits[(c>>24)&0xf]; 721 *outp++ = hexdigits[(c>>20)&0xf]; 722 *outp++ = hexdigits[(c>>16)&0xf]; 723 *outp++ = hexdigits[(c>>12)&0xf]; 724 *outp++ = hexdigits[(c>>8)&0xf]; 725 } 726 else 727#endif 728 if (c >= 0x100) { 729 *outp++ = 'u'; 730 *outp++ = hexdigits[(c>>12)&0xf]; 731 *outp++ = hexdigits[(c>>8)&0xf]; 732 } 733 else 734 *outp++ = 'x'; 735 *outp++ = hexdigits[(c>>4)&0xf]; 736 *outp++ = hexdigits[c&0xf]; 737 } 738 739 restuple = Py_BuildValue("(On)", res, end); 740 Py_DECREF(res); 741 Py_DECREF(object); 742 return restuple; 743 } 744 else { 745 wrong_exception_type(exc); 746 return NULL; 747 } 748} 749#endif 750 751static PyObject *strict_errors(PyObject *self, PyObject *exc) 752{ 753 return PyCodec_StrictErrors(exc); 754} 755 756 757#ifdef Py_USING_UNICODE 758static PyObject *ignore_errors(PyObject *self, PyObject *exc) 759{ 760 return PyCodec_IgnoreErrors(exc); 761} 762 763 764static PyObject *replace_errors(PyObject *self, PyObject *exc) 765{ 766 return PyCodec_ReplaceErrors(exc); 767} 768 769 770static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc) 771{ 772 return PyCodec_XMLCharRefReplaceErrors(exc); 773} 774 775 776static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) 777{ 778 return PyCodec_BackslashReplaceErrors(exc); 779} 780#endif 781 782static int _PyCodecRegistry_Init(void) 783{ 784 static struct { 785 char *name; 786 PyMethodDef def; 787 } methods[] = 788 { 789 { 790 "strict", 791 { 792 "strict_errors", 793 strict_errors, 794 METH_O, 795 PyDoc_STR("Implements the 'strict' error handling, which " 796 "raises a UnicodeError on coding errors.") 797 } 798 }, 799#ifdef Py_USING_UNICODE 800 { 801 "ignore", 802 { 803 "ignore_errors", 804 ignore_errors, 805 METH_O, 806 PyDoc_STR("Implements the 'ignore' error handling, which " 807 "ignores malformed data and continues.") 808 } 809 }, 810 { 811 "replace", 812 { 813 "replace_errors", 814 replace_errors, 815 METH_O, 816 PyDoc_STR("Implements the 'replace' error handling, which " 817 "replaces malformed data with a replacement marker.") 818 } 819 }, 820 { 821 "xmlcharrefreplace", 822 { 823 "xmlcharrefreplace_errors", 824 xmlcharrefreplace_errors, 825 METH_O, 826 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, " 827 "which replaces an unencodable character with the " 828 "appropriate XML character reference.") 829 } 830 }, 831 { 832 "backslashreplace", 833 { 834 "backslashreplace_errors", 835 backslashreplace_errors, 836 METH_O, 837 PyDoc_STR("Implements the 'backslashreplace' error handling, " 838 "which replaces an unencodable character with a " 839 "backslashed escape sequence.") 840 } 841 } 842#endif 843 }; 844 845 PyInterpreterState *interp = PyThreadState_GET()->interp; 846 PyObject *mod; 847 unsigned i; 848 849 if (interp->codec_search_path != NULL) 850 return 0; 851 852 interp->codec_search_path = PyList_New(0); 853 interp->codec_search_cache = PyDict_New(); 854 interp->codec_error_registry = PyDict_New(); 855 856 if (interp->codec_error_registry) { 857 for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) { 858 PyObject *func = PyCFunction_New(&methods[i].def, NULL); 859 int res; 860 if (!func) 861 Py_FatalError("can't initialize codec error registry"); 862 res = PyCodec_RegisterError(methods[i].name, func); 863 Py_DECREF(func); 864 if (res) 865 Py_FatalError("can't initialize codec error registry"); 866 } 867 } 868 869 if (interp->codec_search_path == NULL || 870 interp->codec_search_cache == NULL || 871 interp->codec_error_registry == NULL) 872 Py_FatalError("can't initialize codec registry"); 873 874 mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0); 875 if (mod == NULL) { 876 if (PyErr_ExceptionMatches(PyExc_ImportError)) { 877 /* Ignore ImportErrors... this is done so that 878 distributions can disable the encodings package. Note 879 that other errors are not masked, e.g. SystemErrors 880 raised to inform the user of an error in the Python 881 configuration are still reported back to the user. */ 882 PyErr_Clear(); 883 return 0; 884 } 885 return -1; 886 } 887 Py_DECREF(mod); 888 return 0; 889} 890