unicodeobject.h revision a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd
1#ifndef Py_UNICODEOBJECT_H 2#define Py_UNICODEOBJECT_H 3 4#include <stdarg.h> 5 6/* 7 8Unicode implementation based on original code by Fredrik Lundh, 9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the 10Unicode Integration Proposal. (See 11http://www.egenix.com/files/python/unicode-proposal.txt). 12 13Copyright (c) Corporation for National Research Initiatives. 14 15 16 Original header: 17 -------------------------------------------------------------------- 18 19 * Yet another Unicode string type for Python. This type supports the 20 * 16-bit Basic Multilingual Plane (BMP) only. 21 * 22 * Written by Fredrik Lundh, January 1999. 23 * 24 * Copyright (c) 1999 by Secret Labs AB. 25 * Copyright (c) 1999 by Fredrik Lundh. 26 * 27 * fredrik@pythonware.com 28 * http://www.pythonware.com 29 * 30 * -------------------------------------------------------------------- 31 * This Unicode String Type is 32 * 33 * Copyright (c) 1999 by Secret Labs AB 34 * Copyright (c) 1999 by Fredrik Lundh 35 * 36 * By obtaining, using, and/or copying this software and/or its 37 * associated documentation, you agree that you have read, understood, 38 * and will comply with the following terms and conditions: 39 * 40 * Permission to use, copy, modify, and distribute this software and its 41 * associated documentation for any purpose and without fee is hereby 42 * granted, provided that the above copyright notice appears in all 43 * copies, and that both that copyright notice and this permission notice 44 * appear in supporting documentation, and that the name of Secret Labs 45 * AB or the author not be used in advertising or publicity pertaining to 46 * distribution of the software without specific, written prior 47 * permission. 48 * 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 56 * -------------------------------------------------------------------- */ 57 58#include <ctype.h> 59 60/* === Internal API ======================================================= */ 61 62/* --- Internal Unicode Format -------------------------------------------- */ 63 64/* Python 3.x requires unicode */ 65#define Py_USING_UNICODE 66 67#ifndef SIZEOF_WCHAR_T 68#error Must define SIZEOF_WCHAR_T 69#endif 70 71#define Py_UNICODE_SIZE SIZEOF_WCHAR_T 72 73/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support 75 for UTF-16) */ 76 77#if Py_UNICODE_SIZE >= 4 78#define Py_UNICODE_WIDE 79#endif 80 81/* Set these flags if the platform has "wchar.h" and the 82 wchar_t type is a 16-bit unsigned type */ 83/* #define HAVE_WCHAR_H */ 84/* #define HAVE_USABLE_WCHAR_T */ 85 86/* Py_UNICODE was the native Unicode storage format (code unit) used by 87 Python and represents a single Unicode element in the Unicode type. 88 With PEP 393, Py_UNICODE is deprected and replaced with a 89 typedef to wchar_t. */ 90 91#ifndef Py_LIMITED_API 92#define PY_UNICODE_TYPE wchar_t 93typedef wchar_t Py_UNICODE; 94#endif 95 96/* If the compiler provides a wchar_t type we try to support it 97 through the interface functions PyUnicode_FromWideChar(), 98 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ 99 100#ifdef HAVE_USABLE_WCHAR_T 101# ifndef HAVE_WCHAR_H 102# define HAVE_WCHAR_H 103# endif 104#endif 105 106#if defined(MS_WINDOWS) 107# define HAVE_MBCS 108#endif 109 110#ifdef HAVE_WCHAR_H 111/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ 112# ifdef _HAVE_BSDI 113# include <time.h> 114# endif 115# include <wchar.h> 116#endif 117 118/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve 119 unicode representations. */ 120#if SIZEOF_INT >= 4 121typedef unsigned int Py_UCS4; 122#elif SIZEOF_LONG >= 4 123typedef unsigned long Py_UCS4; 124#else 125#error "Could not find a proper typedef for Py_UCS4" 126#endif 127 128typedef unsigned short Py_UCS2; 129typedef unsigned char Py_UCS1; 130 131/* --- Internal Unicode Operations ---------------------------------------- */ 132 133/* Since splitting on whitespace is an important use case, and 134 whitespace in most situations is solely ASCII whitespace, we 135 optimize for the common case by using a quick look-up table 136 _Py_ascii_whitespace (see below) with an inlined check. 137 138 */ 139#ifndef Py_LIMITED_API 140#define Py_UNICODE_ISSPACE(ch) \ 141 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) 142 143#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) 144#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) 145#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) 146#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) 147 148#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) 149#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) 150#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) 151 152#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) 153#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) 154#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) 155#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) 156 157#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) 158#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) 159#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) 160 161#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) 162 163#define Py_UNICODE_ISALNUM(ch) \ 164 (Py_UNICODE_ISALPHA(ch) || \ 165 Py_UNICODE_ISDECIMAL(ch) || \ 166 Py_UNICODE_ISDIGIT(ch) || \ 167 Py_UNICODE_ISNUMERIC(ch)) 168 169#define Py_UNICODE_COPY(target, source, length) \ 170 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) 171 172#define Py_UNICODE_FILL(target, value, length) \ 173 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ 174 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ 175 } while (0) 176 177/* macros to work with surrogates */ 178#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) 179#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) 180#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) 181/* Join two surrogate characters and return a single Py_UCS4 value. */ 182#define Py_UNICODE_JOIN_SURROGATES(high, low) \ 183 (((((Py_UCS4)(high) & 0x03FF) << 10) | \ 184 ((Py_UCS4)(low) & 0x03FF)) + 0x10000) 185 186/* Check if substring matches at given offset. The offset must be 187 valid, and the substring must not be empty. */ 188 189#define Py_UNICODE_MATCH(string, offset, substring) \ 190 ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ 191 ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ 192 !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) 193 194#endif /* Py_LIMITED_API */ 195 196#ifdef __cplusplus 197extern "C" { 198#endif 199 200/* --- Unicode Type ------------------------------------------------------- */ 201 202#ifndef Py_LIMITED_API 203 204/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject 205 structure. state.ascii and state.compact are set, and the data 206 immediately follow the structure. utf8_length and wstr_length can be found 207 in the length field; the utf8 pointer is equal to the data pointer. */ 208typedef struct { 209 PyObject_HEAD 210 Py_ssize_t length; /* Number of code points in the string */ 211 Py_hash_t hash; /* Hash value; -1 if not set */ 212 struct { 213 /* 214 SSTATE_NOT_INTERNED (0) 215 SSTATE_INTERNED_MORTAL (1) 216 SSTATE_INTERNED_IMMORTAL (2) 217 218 If interned != SSTATE_NOT_INTERNED, the two references from the 219 dictionary to this object are *not* counted in ob_refcnt. 220 */ 221 unsigned int interned:2; 222 /* Character size: 223 224 PyUnicode_WCHAR_KIND (0): wchar_t* 225 PyUnicode_1BYTE_KIND (1): Py_UCS1* 226 PyUnicode_2BYTE_KIND (2): Py_UCS2* 227 PyUnicode_4BYTE_KIND (3): Py_UCS4* 228 */ 229 unsigned int kind:2; 230 /* Compact is with respect to the allocation scheme. Compact unicode 231 objects only require one memory block while non-compact objects use 232 one block for the PyUnicodeObject struct and another for its data 233 buffer. */ 234 unsigned int compact:1; 235 /* Compact objects which are ASCII-only also have the state.compact 236 flag set, and use the PyASCIIObject struct. */ 237 unsigned int ascii:1; 238 /* The ready flag indicates whether the object layout is initialized 239 completely. This means that this is either a compact object, or 240 the data pointer is filled out. The bit is redundant, and helps 241 to minimize the test in PyUnicode_IS_READY(). */ 242 unsigned int ready:1; 243 } state; 244 wchar_t *wstr; /* wchar_t representation (null-terminated) */ 245} PyASCIIObject; 246 247/* Non-ASCII strings allocated through PyUnicode_New use the 248 PyCompactUnicodeOject structure. state.compact is set, and the data 249 immediately follow the structure. */ 250typedef struct { 251 PyASCIIObject _base; 252 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the 253 * terminating \0. */ 254 char *utf8; /* UTF-8 representation (null-terminated) */ 255 Py_ssize_t wstr_length; /* Number of code points in wstr, possible 256 * surrogates count as two code points. */ 257} PyCompactUnicodeObject; 258 259/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the 260 PyUnicodeObject structure. The actual string data is initially in the wstr 261 block, and copied into the data block using PyUnicode_Ready. */ 262typedef struct { 263 PyCompactUnicodeObject _base; 264 union { 265 void *any; 266 Py_UCS1 *latin1; 267 Py_UCS2 *ucs2; 268 Py_UCS4 *ucs4; 269 } data; /* Canonical, smallest-form Unicode buffer */ 270} PyUnicodeObject; 271#endif 272 273PyAPI_DATA(PyTypeObject) PyUnicode_Type; 274PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; 275 276#define PyUnicode_Check(op) \ 277 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) 278#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) 279 280/* Fast access macros */ 281#ifndef Py_LIMITED_API 282 283#define PyUnicode_WSTR_LENGTH(op) \ 284 (((PyASCIIObject*)op)->state.ascii ? \ 285 ((PyASCIIObject*)op)->length : \ 286 ((PyCompactUnicodeObject*)op)->wstr_length) 287 288/* Returns the deprecated Py_UNICODE representation's size in code units 289 (this includes surrogate pairs as 2 units). 290 If the Py_UNICODE representation is not available, it will be computed 291 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ 292 293#define PyUnicode_GET_SIZE(op) \ 294 (assert(PyUnicode_Check(op)), \ 295 (((PyASCIIObject *)(op))->wstr) ? \ 296 PyUnicode_WSTR_LENGTH(op) : \ 297 ((void)PyUnicode_AsUnicode((PyObject *)(op)), \ 298 PyUnicode_WSTR_LENGTH(op))) 299 300#define PyUnicode_GET_DATA_SIZE(op) \ 301 (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) 302 303/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE 304 representation on demand. Using this macro is very inefficient now, 305 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or 306 use PyUnicode_WRITE() and PyUnicode_READ(). */ 307 308#define PyUnicode_AS_UNICODE(op) \ 309 (assert(PyUnicode_Check(op)), \ 310 (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ 311 PyUnicode_AsUnicode((PyObject *)(op))) 312 313#define PyUnicode_AS_DATA(op) \ 314 ((const char *)(PyUnicode_AS_UNICODE(op))) 315 316 317/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */ 318 319/* Values for PyUnicodeObject.state: */ 320 321/* Interning state. */ 322#define SSTATE_NOT_INTERNED 0 323#define SSTATE_INTERNED_MORTAL 1 324#define SSTATE_INTERNED_IMMORTAL 2 325 326#define PyUnicode_IS_COMPACT_ASCII(op) (((PyASCIIObject*)op)->state.ascii) 327 328/* String contains only wstr byte characters. This is only possible 329 when the string was created with a legacy API and PyUnicode_Ready() 330 has not been called yet. */ 331#define PyUnicode_WCHAR_KIND 0 332 333/* Return values of the PyUnicode_KIND() macro: */ 334 335#define PyUnicode_1BYTE_KIND 1 336#define PyUnicode_2BYTE_KIND 2 337#define PyUnicode_4BYTE_KIND 3 338 339 340/* Return the number of bytes the string uses to represent single characters, 341 this can be 1, 2 or 4. */ 342#define PyUnicode_CHARACTER_SIZE(op) \ 343 (1 << (PyUnicode_KIND(op) - 1)) 344 345/* Return pointers to the canonical representation casted as unsigned char, 346 Py_UCS2, or Py_UCS4 for direct character access. 347 No checks are performed, use PyUnicode_CHARACTER_SIZE or 348 PyUnicode_KIND() before to ensure these will work correctly. */ 349 350#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) 351#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) 352#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) 353 354/* Return true if the string is compact or 0 if not. 355 No type checks or Ready calls are performed. */ 356#define PyUnicode_IS_COMPACT(op) \ 357 (((PyASCIIObject*)(op))->state.compact) 358 359/* Return one of the PyUnicode_*_KIND values defined above. */ 360#define PyUnicode_KIND(op) \ 361 (assert(PyUnicode_Check(op)), \ 362 assert(PyUnicode_IS_READY(op)), \ 363 ((PyASCIIObject *)(op))->state.kind) 364 365/* Return a void pointer to the raw unicode buffer. */ 366#define _PyUnicode_COMPACT_DATA(op) \ 367 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 368 ((void*)((PyASCIIObject*)(op) + 1)) : \ 369 ((void*)((PyCompactUnicodeObject*)(op) + 1))) 370 371#define _PyUnicode_NONCOMPACT_DATA(op) \ 372 (assert(((PyUnicodeObject*)(op))->data.any), \ 373 ((((PyUnicodeObject *)(op))->data.any))) 374 375#define PyUnicode_DATA(op) \ 376 (assert(PyUnicode_Check(op)), \ 377 PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ 378 _PyUnicode_NONCOMPACT_DATA(op)) 379 380#define _PyUnicode_UTF8(op) \ 381 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 382 ((char*)((PyASCIIObject*)(op) + 1)) : \ 383 ((PyCompactUnicodeObject*)(op))->utf8) 384 385#define _PyUnicode_UTF8_LENGTH(op) \ 386 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 387 ((PyASCIIObject*)(op))->length : \ 388 ((PyCompactUnicodeObject*)(op))->utf8_length) 389 390/* Compute (index * char_size) where char_size is 2 ** (kind - 1). 391 392 The index is a character index, the result is a size in bytes. */ 393#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1)) 394 395/* In the access macros below, "kind" may be evaluated more than once. 396 All other macro parameters are evaluated exactly once, so it is safe 397 to put side effects into them (such as increasing the index). */ 398 399/* Write into the canonical representation, this macro does not do any sanity 400 checks and is intended for usage in loops. The caller should cache the 401 kind and data pointers optained form other macro calls. 402 index is the index in the string (starts at 0) and value is the new 403 code point value which shoule be written to that location. */ 404#define PyUnicode_WRITE(kind, data, index, value) \ 405 do { \ 406 switch ((kind)) { \ 407 case PyUnicode_1BYTE_KIND: { \ 408 ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ 409 break; \ 410 } \ 411 case PyUnicode_2BYTE_KIND: { \ 412 ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ 413 break; \ 414 } \ 415 default: { \ 416 assert((kind) == PyUnicode_4BYTE_KIND); \ 417 ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ 418 } \ 419 } \ 420 } while (0) 421 422/* Read a code point form the string's canonical representation. No checks 423 or ready calls are performed. */ 424#define PyUnicode_READ(kind, data, index) \ 425 ((Py_UCS4) \ 426 ((kind) == PyUnicode_1BYTE_KIND ? \ 427 ((const unsigned char *)(data))[(index)] : \ 428 ((kind) == PyUnicode_2BYTE_KIND ? \ 429 ((const Py_UCS2 *)(data))[(index)] : \ 430 ((const Py_UCS4 *)(data))[(index)] \ 431 ) \ 432 )) 433 434/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it 435 calls PyUnicode_KIND() and might call it twice. For single reads, use 436 PyUnicode_READ_CHAR, for multiple consecutive reads callers should 437 cache kind and use PyUnicode_READ instead. */ 438#define PyUnicode_READ_CHAR(unicode, index) \ 439 ((Py_UCS4) \ 440 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ 441 ((const unsigned char *)(PyUnicode_DATA((unicode))))[(index)] : \ 442 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ 443 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ 444 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ 445 ) \ 446 )) 447 448/* Returns the length of the unicode string. The caller has to make sure that 449 the string has it's canonical representation set before calling 450 this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ 451#define PyUnicode_GET_LENGTH(op) \ 452 (assert(PyUnicode_Check(op)), \ 453 assert(PyUnicode_IS_READY(op)), \ 454 ((PyASCIIObject *)(op))->length) 455 456 457/* Fast check to determine whether an object is ready. Equivalent to 458 PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ 459 460#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) 461 462/* PyUnicode_READY() does less work than PyUnicode_Ready() in the best 463 case. If the canonical representation is not yet set, it will still call 464 PyUnicode_Ready(). 465 Returns 0 on success and -1 on errors. */ 466#define PyUnicode_READY(op) \ 467 (assert(PyUnicode_Check(op)), \ 468 (PyUnicode_IS_READY(op) ? \ 469 0 : _PyUnicode_Ready((PyUnicodeObject *)(op)))) 470 471/* Return a maximum character value which is suitable for creating another 472 string based on op. This is always an approximation but more efficient 473 than interating over the string. */ 474#define PyUnicode_MAX_CHAR_VALUE(op) \ 475 (assert(PyUnicode_IS_READY(op)), \ 476 (PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f: \ 477 (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ 478 (PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \ 479 (0x7fU) : (0xffU) \ 480 ) : \ 481 (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ 482 (0xffffU) : (0x10ffffU) \ 483 )))) 484 485#endif 486 487/* --- Constants ---------------------------------------------------------- */ 488 489/* This Unicode character will be used as replacement character during 490 decoding if the errors argument is set to "replace". Note: the 491 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 492 Unicode 3.0. */ 493 494#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) 495 496/* === Public API ========================================================= */ 497 498/* --- Plain Py_UNICODE --------------------------------------------------- */ 499 500/* With PEP 393, this is the recommended way to allocate a new unicode object. 501 This function will allocate the object and its buffer in a single memory 502 block. Objects created using this function are not resizable. */ 503#ifndef Py_LIMITED_API 504PyAPI_FUNC(PyObject*) PyUnicode_New( 505 Py_ssize_t size, /* Number of code points in the new string */ 506 Py_UCS4 maxchar /* maximum code point value in the string */ 507 ); 508#endif 509 510/* Initializes the canonical string representation from a the deprected 511 wstr/Py_UNICODE representation. This function is used to convert 512 unicode objects which were created using the old API to the new flexible 513 format introduced with PEP 393. The PyUnicode_READY() macro can be 514 more efficient if the string is already ready. */ 515#ifndef Py_LIMITED_API 516PyAPI_FUNC(int) _PyUnicode_Ready( 517 PyUnicodeObject *unicode /* Unicode object */ 518 ); 519#endif 520 521/* Copy character from one unicode object into another, this function performs 522 character conversion when necessary and falls back to memcpy if possible. 523 524 Fail if to is too small (smaller than how_many or smaller than 525 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > 526 kind(to), or if to has more than 1 reference. 527 528 Return the number of written character, or return -1 and raise an exception 529 on error. 530 531 Pseudo-code: 532 533 how_many = min(how_many, len(from) - from_start) 534 to[to_start:to_start+how_many] = from[from_start:from_start+how_many] 535 return how_many 536 537 Note: The function doesn't write a terminating null character. 538 */ 539#ifndef Py_LIMITED_API 540PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( 541 PyObject *to, 542 Py_ssize_t to_start, 543 PyObject *from, 544 Py_ssize_t from_start, 545 Py_ssize_t how_many 546 ); 547#endif 548 549/* Create a Unicode Object from the Py_UNICODE buffer u of the given 550 size. 551 552 u may be NULL which causes the contents to be undefined. It is the 553 user's responsibility to fill in the needed data afterwards. Note 554 that modifying the Unicode object contents after construction is 555 only allowed if u was set to NULL. 556 557 The buffer is copied into the new object. */ 558 559#ifndef Py_LIMITED_API 560PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( 561 const Py_UNICODE *u, /* Unicode buffer */ 562 Py_ssize_t size /* size of buffer */ 563 ); 564#endif 565 566/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ 567PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( 568 const char *u, /* UTF-8 encoded string */ 569 Py_ssize_t size /* size of buffer */ 570 ); 571 572/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated 573 UTF-8 encoded bytes. The size is determined with strlen(). */ 574PyAPI_FUNC(PyObject*) PyUnicode_FromString( 575 const char *u /* UTF-8 encoded string */ 576 ); 577 578#ifndef Py_LIMITED_API 579PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( 580 int kind, 581 const void *buffer, 582 Py_ssize_t size); 583#endif 584 585PyAPI_FUNC(PyObject*) PyUnicode_Substring( 586 PyObject *str, 587 Py_ssize_t start, 588 Py_ssize_t end); 589 590/* Copy the string into a UCS4 buffer including the null character is copy_null 591 is set. Return NULL and raise an exception on error. Raise a ValueError if 592 the buffer is smaller than the string. Return buffer on success. 593 594 buflen is the length of the buffer in (Py_UCS4) characters. */ 595PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( 596 PyObject *unicode, 597 Py_UCS4* buffer, 598 Py_ssize_t buflen, 599 int copy_null); 600 601/* Copy the string into a UCS4 buffer. A new buffer is allocated using 602 * PyMem_Malloc; if this fails, NULL is returned with a memory error 603 exception set. */ 604PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); 605 606/* Return a read-only pointer to the Unicode object's internal 607 Py_UNICODE buffer. 608 If the wchar_t/Py_UNICODE representation is not yet available, this 609 function will calculate it. */ 610 611#ifndef Py_LIMITED_API 612PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( 613 PyObject *unicode /* Unicode object */ 614 ); 615#endif 616 617/* Return a read-only pointer to the Unicode object's internal 618 Py_UNICODE buffer and save the length at size. 619 If the wchar_t/Py_UNICODE representation is not yet available, this 620 function will calculate it. */ 621 622#ifndef Py_LIMITED_API 623PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( 624 PyObject *unicode, /* Unicode object */ 625 Py_ssize_t *size /* location where to save the length */ 626 ); 627#endif 628 629/* Get the length of the Unicode object. */ 630 631PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( 632 PyObject *unicode 633); 634 635/* Get the number of Py_UNICODE units in the 636 string representation. */ 637 638PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( 639 PyObject *unicode /* Unicode object */ 640 ); 641 642/* Read a character from the string. */ 643 644PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( 645 PyObject *unicode, 646 Py_ssize_t index 647 ); 648 649/* Write a character to the string. The string must have been created through 650 PyUnicode_New, must not be shared, and must not have been hashed yet. */ 651 652PyAPI_FUNC(int) PyUnicode_WriteChar( 653 PyObject *unicode, 654 Py_ssize_t index, 655 Py_UCS4 character 656 ); 657 658#ifndef Py_LIMITED_API 659/* Get the maximum ordinal for a Unicode character. */ 660PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); 661#endif 662 663/* Resize an already allocated Unicode object to the new size length. 664 665 *unicode is modified to point to the new (resized) object and 0 666 returned on success. 667 668 This API may only be called by the function which also called the 669 Unicode constructor. The refcount on the object must be 1. Otherwise, 670 an error is returned. 671 672 Error handling is implemented as follows: an exception is set, -1 673 is returned and *unicode left untouched. 674 675*/ 676 677PyAPI_FUNC(int) PyUnicode_Resize( 678 PyObject **unicode, /* Pointer to the Unicode object */ 679 Py_ssize_t length /* New length */ 680 ); 681 682/* Coerce obj to an Unicode object and return a reference with 683 *incremented* refcount. 684 685 Coercion is done in the following way: 686 687 1. bytes, bytearray and other char buffer compatible objects are decoded 688 under the assumptions that they contain data using the UTF-8 689 encoding. Decoding is done in "strict" mode. 690 691 2. All other objects (including Unicode objects) raise an 692 exception. 693 694 The API returns NULL in case of an error. The caller is responsible 695 for decref'ing the returned objects. 696 697*/ 698 699PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( 700 register PyObject *obj, /* Object */ 701 const char *encoding, /* encoding */ 702 const char *errors /* error handling */ 703 ); 704 705/* Coerce obj to an Unicode object and return a reference with 706 *incremented* refcount. 707 708 Unicode objects are passed back as-is (subclasses are converted to 709 true Unicode objects), all other objects are delegated to 710 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 711 using UTF-8 encoding as basis for decoding the object. 712 713 The API returns NULL in case of an error. The caller is responsible 714 for decref'ing the returned objects. 715 716*/ 717 718PyAPI_FUNC(PyObject*) PyUnicode_FromObject( 719 register PyObject *obj /* Object */ 720 ); 721 722PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( 723 const char *format, /* ASCII-encoded string */ 724 va_list vargs 725 ); 726PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( 727 const char *format, /* ASCII-encoded string */ 728 ... 729 ); 730 731#ifndef Py_LIMITED_API 732/* Format the object based on the format_spec, as defined in PEP 3101 733 (Advanced String Formatting). */ 734PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, 735 PyObject *format_spec, 736 Py_ssize_t start, 737 Py_ssize_t end); 738#endif 739 740PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); 741PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); 742PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( 743 const char *u /* UTF-8 encoded string */ 744 ); 745#ifndef Py_LIMITED_API 746PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); 747#endif 748 749/* Use only if you know it's a string */ 750#define PyUnicode_CHECK_INTERNED(op) \ 751 (((PyASCIIObject *)(op))->state.interned) 752 753/* --- wchar_t support for platforms which support it --------------------- */ 754 755#ifdef HAVE_WCHAR_H 756 757/* Create a Unicode Object from the wchar_t buffer w of the given 758 size. 759 760 The buffer is copied into the new object. */ 761 762PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( 763 register const wchar_t *w, /* wchar_t buffer */ 764 Py_ssize_t size /* size of buffer */ 765 ); 766 767/* Copies the Unicode Object contents into the wchar_t buffer w. At 768 most size wchar_t characters are copied. 769 770 Note that the resulting wchar_t string may or may not be 771 0-terminated. It is the responsibility of the caller to make sure 772 that the wchar_t string is 0-terminated in case this is required by 773 the application. 774 775 Returns the number of wchar_t characters copied (excluding a 776 possibly trailing 0-termination character) or -1 in case of an 777 error. */ 778 779PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( 780 PyObject *unicode, /* Unicode object */ 781 register wchar_t *w, /* wchar_t buffer */ 782 Py_ssize_t size /* size of buffer */ 783 ); 784 785/* Convert the Unicode object to a wide character string. The output string 786 always ends with a nul character. If size is not NULL, write the number of 787 wide characters (excluding the null character) into *size. 788 789 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it) 790 on success. On error, returns NULL, *size is undefined and raises a 791 MemoryError. */ 792 793PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( 794 PyObject *unicode, /* Unicode object */ 795 Py_ssize_t *size /* number of characters of the result */ 796 ); 797 798PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); 799 800#endif 801 802/* --- Unicode ordinals --------------------------------------------------- */ 803 804/* Create a Unicode Object from the given Unicode code point ordinal. 805 806 The ordinal must be in range(0x10000) on narrow Python builds 807 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 808 raised in case it is not. 809 810*/ 811 812PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); 813 814/* --- Free-list management ----------------------------------------------- */ 815 816/* Clear the free list used by the Unicode implementation. 817 818 This can be used to release memory used for objects on the free 819 list back to the Python memory allocator. 820 821*/ 822 823PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); 824 825/* === Builtin Codecs ===================================================== 826 827 Many of these APIs take two arguments encoding and errors. These 828 parameters encoding and errors have the same semantics as the ones 829 of the builtin str() API. 830 831 Setting encoding to NULL causes the default encoding (UTF-8) to be used. 832 833 Error handling is set by errors which may also be set to NULL 834 meaning to use the default handling defined for the codec. Default 835 error handling for all builtin codecs is "strict" (ValueErrors are 836 raised). 837 838 The codecs all use a similar interface. Only deviation from the 839 generic ones are documented. 840 841*/ 842 843/* --- Manage the default encoding ---------------------------------------- */ 844 845/* Returns a pointer to the default encoding (UTF-8) of the 846 Unicode object unicode and the size of the encoded representation 847 in bytes stored in *size. 848 849 In case of an error, no *size is set. 850 851 This funcation caches the UTF-8 encoded string in the unicodeobject 852 and subsequent calls will return the same string. The memory is relased 853 when the unicodeobject is deallocated. 854 855 _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to 856 support the previous internal function with the same behaviour. 857 858 *** This API is for interpreter INTERNAL USE ONLY and will likely 859 *** be removed or changed in the future. 860 861 *** If you need to access the Unicode object as UTF-8 bytes string, 862 *** please use PyUnicode_AsUTF8String() instead. 863*/ 864 865#ifndef Py_LIMITED_API 866PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize( 867 PyObject *unicode, 868 Py_ssize_t *size); 869#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize 870#endif 871 872/* Returns a pointer to the default encoding (UTF-8) of the 873 Unicode object unicode. 874 875 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation 876 in the unicodeobject. 877 878 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to 879 support the previous internal function with the same behaviour. 880 881 Use of this API is DEPRECATED since no size information can be 882 extracted from the returned data. 883 884 *** This API is for interpreter INTERNAL USE ONLY and will likely 885 *** be removed or changed for Python 3.1. 886 887 *** If you need to access the Unicode object as UTF-8 bytes string, 888 *** please use PyUnicode_AsUTF8String() instead. 889 890*/ 891 892#ifndef Py_LIMITED_API 893PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode); 894#define _PyUnicode_AsString PyUnicode_AsUTF8 895#endif 896 897/* Returns "utf-8". */ 898 899PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); 900 901/* --- Generic Codecs ----------------------------------------------------- */ 902 903/* Create a Unicode object by decoding the encoded string s of the 904 given size. */ 905 906PyAPI_FUNC(PyObject*) PyUnicode_Decode( 907 const char *s, /* encoded string */ 908 Py_ssize_t size, /* size of buffer */ 909 const char *encoding, /* encoding */ 910 const char *errors /* error handling */ 911 ); 912 913/* Decode a Unicode object unicode and return the result as Python 914 object. */ 915 916PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( 917 PyObject *unicode, /* Unicode object */ 918 const char *encoding, /* encoding */ 919 const char *errors /* error handling */ 920 ); 921 922/* Decode a Unicode object unicode and return the result as Unicode 923 object. */ 924 925PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( 926 PyObject *unicode, /* Unicode object */ 927 const char *encoding, /* encoding */ 928 const char *errors /* error handling */ 929 ); 930 931/* Encodes a Py_UNICODE buffer of the given size and returns a 932 Python string object. */ 933 934#ifndef Py_LIMITED_API 935PyAPI_FUNC(PyObject*) PyUnicode_Encode( 936 const Py_UNICODE *s, /* Unicode char buffer */ 937 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ 938 const char *encoding, /* encoding */ 939 const char *errors /* error handling */ 940 ); 941#endif 942 943/* Encodes a Unicode object and returns the result as Python 944 object. */ 945 946PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( 947 PyObject *unicode, /* Unicode object */ 948 const char *encoding, /* encoding */ 949 const char *errors /* error handling */ 950 ); 951 952/* Encodes a Unicode object and returns the result as Python string 953 object. */ 954 955PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( 956 PyObject *unicode, /* Unicode object */ 957 const char *encoding, /* encoding */ 958 const char *errors /* error handling */ 959 ); 960 961/* Encodes a Unicode object and returns the result as Unicode 962 object. */ 963 964PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( 965 PyObject *unicode, /* Unicode object */ 966 const char *encoding, /* encoding */ 967 const char *errors /* error handling */ 968 ); 969 970/* Build an encoding map. */ 971 972PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( 973 PyObject* string /* 256 character map */ 974 ); 975 976/* --- UTF-7 Codecs ------------------------------------------------------- */ 977 978PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( 979 const char *string, /* UTF-7 encoded string */ 980 Py_ssize_t length, /* size of string */ 981 const char *errors /* error handling */ 982 ); 983 984PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( 985 const char *string, /* UTF-7 encoded string */ 986 Py_ssize_t length, /* size of string */ 987 const char *errors, /* error handling */ 988 Py_ssize_t *consumed /* bytes consumed */ 989 ); 990 991#ifndef Py_LIMITED_API 992PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( 993 const Py_UNICODE *data, /* Unicode char buffer */ 994 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 995 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 996 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 997 const char *errors /* error handling */ 998 ); 999#endif 1000 1001/* --- UTF-8 Codecs ------------------------------------------------------- */ 1002 1003PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( 1004 const char *string, /* UTF-8 encoded string */ 1005 Py_ssize_t length, /* size of string */ 1006 const char *errors /* error handling */ 1007 ); 1008 1009PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( 1010 const char *string, /* UTF-8 encoded string */ 1011 Py_ssize_t length, /* size of string */ 1012 const char *errors, /* error handling */ 1013 Py_ssize_t *consumed /* bytes consumed */ 1014 ); 1015 1016PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( 1017 PyObject *unicode /* Unicode object */ 1018 ); 1019 1020#ifndef Py_LIMITED_API 1021PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( 1022 PyObject *unicode, 1023 const char *errors); 1024 1025PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( 1026 const Py_UNICODE *data, /* Unicode char buffer */ 1027 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1028 const char *errors /* error handling */ 1029 ); 1030#endif 1031 1032/* --- UTF-32 Codecs ------------------------------------------------------ */ 1033 1034/* Decodes length bytes from a UTF-32 encoded buffer string and returns 1035 the corresponding Unicode object. 1036 1037 errors (if non-NULL) defines the error handling. It defaults 1038 to "strict". 1039 1040 If byteorder is non-NULL, the decoder starts decoding using the 1041 given byte order: 1042 1043 *byteorder == -1: little endian 1044 *byteorder == 0: native order 1045 *byteorder == 1: big endian 1046 1047 In native mode, the first four bytes of the stream are checked for a 1048 BOM mark. If found, the BOM mark is analysed, the byte order 1049 adjusted and the BOM skipped. In the other modes, no BOM mark 1050 interpretation is done. After completion, *byteorder is set to the 1051 current byte order at the end of input data. 1052 1053 If byteorder is NULL, the codec starts in native order mode. 1054 1055*/ 1056 1057PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( 1058 const char *string, /* UTF-32 encoded string */ 1059 Py_ssize_t length, /* size of string */ 1060 const char *errors, /* error handling */ 1061 int *byteorder /* pointer to byteorder to use 1062 0=native;-1=LE,1=BE; updated on 1063 exit */ 1064 ); 1065 1066PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( 1067 const char *string, /* UTF-32 encoded string */ 1068 Py_ssize_t length, /* size of string */ 1069 const char *errors, /* error handling */ 1070 int *byteorder, /* pointer to byteorder to use 1071 0=native;-1=LE,1=BE; updated on 1072 exit */ 1073 Py_ssize_t *consumed /* bytes consumed */ 1074 ); 1075 1076/* Returns a Python string using the UTF-32 encoding in native byte 1077 order. The string always starts with a BOM mark. */ 1078 1079PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( 1080 PyObject *unicode /* Unicode object */ 1081 ); 1082 1083/* Returns a Python string object holding the UTF-32 encoded value of 1084 the Unicode data. 1085 1086 If byteorder is not 0, output is written according to the following 1087 byte order: 1088 1089 byteorder == -1: little endian 1090 byteorder == 0: native byte order (writes a BOM mark) 1091 byteorder == 1: big endian 1092 1093 If byteorder is 0, the output string will always start with the 1094 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1095 prepended. 1096 1097*/ 1098 1099#ifndef Py_LIMITED_API 1100PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( 1101 const Py_UNICODE *data, /* Unicode char buffer */ 1102 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1103 const char *errors, /* error handling */ 1104 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1105 ); 1106#endif 1107 1108/* --- UTF-16 Codecs ------------------------------------------------------ */ 1109 1110/* Decodes length bytes from a UTF-16 encoded buffer string and returns 1111 the corresponding Unicode object. 1112 1113 errors (if non-NULL) defines the error handling. It defaults 1114 to "strict". 1115 1116 If byteorder is non-NULL, the decoder starts decoding using the 1117 given byte order: 1118 1119 *byteorder == -1: little endian 1120 *byteorder == 0: native order 1121 *byteorder == 1: big endian 1122 1123 In native mode, the first two bytes of the stream are checked for a 1124 BOM mark. If found, the BOM mark is analysed, the byte order 1125 adjusted and the BOM skipped. In the other modes, no BOM mark 1126 interpretation is done. After completion, *byteorder is set to the 1127 current byte order at the end of input data. 1128 1129 If byteorder is NULL, the codec starts in native order mode. 1130 1131*/ 1132 1133PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( 1134 const char *string, /* UTF-16 encoded string */ 1135 Py_ssize_t length, /* size of string */ 1136 const char *errors, /* error handling */ 1137 int *byteorder /* pointer to byteorder to use 1138 0=native;-1=LE,1=BE; updated on 1139 exit */ 1140 ); 1141 1142PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( 1143 const char *string, /* UTF-16 encoded string */ 1144 Py_ssize_t length, /* size of string */ 1145 const char *errors, /* error handling */ 1146 int *byteorder, /* pointer to byteorder to use 1147 0=native;-1=LE,1=BE; updated on 1148 exit */ 1149 Py_ssize_t *consumed /* bytes consumed */ 1150 ); 1151 1152/* Returns a Python string using the UTF-16 encoding in native byte 1153 order. The string always starts with a BOM mark. */ 1154 1155PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( 1156 PyObject *unicode /* Unicode object */ 1157 ); 1158 1159/* Returns a Python string object holding the UTF-16 encoded value of 1160 the Unicode data. 1161 1162 If byteorder is not 0, output is written according to the following 1163 byte order: 1164 1165 byteorder == -1: little endian 1166 byteorder == 0: native byte order (writes a BOM mark) 1167 byteorder == 1: big endian 1168 1169 If byteorder is 0, the output string will always start with the 1170 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1171 prepended. 1172 1173 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1174 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1175 at a later point without compromising the APIs. 1176 1177*/ 1178 1179#ifndef Py_LIMITED_API 1180PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( 1181 const Py_UNICODE *data, /* Unicode char buffer */ 1182 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1183 const char *errors, /* error handling */ 1184 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1185 ); 1186#endif 1187 1188/* --- Unicode-Escape Codecs ---------------------------------------------- */ 1189 1190PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( 1191 const char *string, /* Unicode-Escape encoded string */ 1192 Py_ssize_t length, /* size of string */ 1193 const char *errors /* error handling */ 1194 ); 1195 1196PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( 1197 PyObject *unicode /* Unicode object */ 1198 ); 1199 1200#ifndef Py_LIMITED_API 1201PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( 1202 const Py_UNICODE *data, /* Unicode char buffer */ 1203 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1204 ); 1205#endif 1206 1207/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ 1208 1209PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( 1210 const char *string, /* Raw-Unicode-Escape encoded string */ 1211 Py_ssize_t length, /* size of string */ 1212 const char *errors /* error handling */ 1213 ); 1214 1215PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( 1216 PyObject *unicode /* Unicode object */ 1217 ); 1218 1219#ifndef Py_LIMITED_API 1220PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( 1221 const Py_UNICODE *data, /* Unicode char buffer */ 1222 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1223 ); 1224#endif 1225 1226/* --- Unicode Internal Codec --------------------------------------------- 1227 1228 Only for internal use in _codecsmodule.c */ 1229 1230#ifndef Py_LIMITED_API 1231PyObject *_PyUnicode_DecodeUnicodeInternal( 1232 const char *string, 1233 Py_ssize_t length, 1234 const char *errors 1235 ); 1236#endif 1237 1238/* --- Latin-1 Codecs ----------------------------------------------------- 1239 1240 Note: Latin-1 corresponds to the first 256 Unicode ordinals. 1241 1242*/ 1243 1244PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( 1245 const char *string, /* Latin-1 encoded string */ 1246 Py_ssize_t length, /* size of string */ 1247 const char *errors /* error handling */ 1248 ); 1249 1250PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( 1251 PyObject *unicode /* Unicode object */ 1252 ); 1253 1254#ifndef Py_LIMITED_API 1255PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( 1256 PyObject* unicode, 1257 const char* errors); 1258 1259PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( 1260 const Py_UNICODE *data, /* Unicode char buffer */ 1261 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1262 const char *errors /* error handling */ 1263 ); 1264#endif 1265 1266/* --- ASCII Codecs ------------------------------------------------------- 1267 1268 Only 7-bit ASCII data is excepted. All other codes generate errors. 1269 1270*/ 1271 1272PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( 1273 const char *string, /* ASCII encoded string */ 1274 Py_ssize_t length, /* size of string */ 1275 const char *errors /* error handling */ 1276 ); 1277 1278PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( 1279 PyObject *unicode /* Unicode object */ 1280 ); 1281 1282#ifndef Py_LIMITED_API 1283PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( 1284 PyObject* unicode, 1285 const char* errors); 1286 1287PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( 1288 const Py_UNICODE *data, /* Unicode char buffer */ 1289 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1290 const char *errors /* error handling */ 1291 ); 1292#endif 1293 1294/* --- Character Map Codecs ----------------------------------------------- 1295 1296 This codec uses mappings to encode and decode characters. 1297 1298 Decoding mappings must map single string characters to single 1299 Unicode characters, integers (which are then interpreted as Unicode 1300 ordinals) or None (meaning "undefined mapping" and causing an 1301 error). 1302 1303 Encoding mappings must map single Unicode characters to single 1304 string characters, integers (which are then interpreted as Latin-1 1305 ordinals) or None (meaning "undefined mapping" and causing an 1306 error). 1307 1308 If a character lookup fails with a LookupError, the character is 1309 copied as-is meaning that its ordinal value will be interpreted as 1310 Unicode or Latin-1 ordinal resp. Because of this mappings only need 1311 to contain those mappings which map characters to different code 1312 points. 1313 1314*/ 1315 1316PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( 1317 const char *string, /* Encoded string */ 1318 Py_ssize_t length, /* size of string */ 1319 PyObject *mapping, /* character mapping 1320 (char ordinal -> unicode ordinal) */ 1321 const char *errors /* error handling */ 1322 ); 1323 1324PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( 1325 PyObject *unicode, /* Unicode object */ 1326 PyObject *mapping /* character mapping 1327 (unicode ordinal -> char ordinal) */ 1328 ); 1329 1330#ifndef Py_LIMITED_API 1331PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( 1332 const Py_UNICODE *data, /* Unicode char buffer */ 1333 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1334 PyObject *mapping, /* character mapping 1335 (unicode ordinal -> char ordinal) */ 1336 const char *errors /* error handling */ 1337 ); 1338#endif 1339 1340/* Translate a Py_UNICODE buffer of the given length by applying a 1341 character mapping table to it and return the resulting Unicode 1342 object. 1343 1344 The mapping table must map Unicode ordinal integers to Unicode 1345 ordinal integers or None (causing deletion of the character). 1346 1347 Mapping tables may be dictionaries or sequences. Unmapped character 1348 ordinals (ones which cause a LookupError) are left untouched and 1349 are copied as-is. 1350 1351*/ 1352 1353#ifndef Py_LIMITED_API 1354PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( 1355 const Py_UNICODE *data, /* Unicode char buffer */ 1356 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1357 PyObject *table, /* Translate table */ 1358 const char *errors /* error handling */ 1359 ); 1360#endif 1361 1362#ifdef HAVE_MBCS 1363 1364/* --- MBCS codecs for Windows -------------------------------------------- */ 1365 1366PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( 1367 const char *string, /* MBCS encoded string */ 1368 Py_ssize_t length, /* size of string */ 1369 const char *errors /* error handling */ 1370 ); 1371 1372PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( 1373 const char *string, /* MBCS encoded string */ 1374 Py_ssize_t length, /* size of string */ 1375 const char *errors, /* error handling */ 1376 Py_ssize_t *consumed /* bytes consumed */ 1377 ); 1378 1379PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( 1380 PyObject *unicode /* Unicode object */ 1381 ); 1382 1383#ifndef Py_LIMITED_API 1384PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( 1385 const Py_UNICODE *data, /* Unicode char buffer */ 1386 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1387 const char *errors /* error handling */ 1388 ); 1389#endif 1390 1391#endif /* HAVE_MBCS */ 1392 1393/* --- Decimal Encoder ---------------------------------------------------- */ 1394 1395/* Takes a Unicode string holding a decimal value and writes it into 1396 an output buffer using standard ASCII digit codes. 1397 1398 The output buffer has to provide at least length+1 bytes of storage 1399 area. The output string is 0-terminated. 1400 1401 The encoder converts whitespace to ' ', decimal characters to their 1402 corresponding ASCII digit and all other Latin-1 characters except 1403 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1404 are treated as errors. This includes embedded NULL bytes. 1405 1406 Error handling is defined by the errors argument: 1407 1408 NULL or "strict": raise a ValueError 1409 "ignore": ignore the wrong characters (these are not copied to the 1410 output buffer) 1411 "replace": replaces illegal characters with '?' 1412 1413 Returns 0 on success, -1 on failure. 1414 1415*/ 1416 1417#ifndef Py_LIMITED_API 1418PyAPI_FUNC(int) PyUnicode_EncodeDecimal( 1419 Py_UNICODE *s, /* Unicode buffer */ 1420 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1421 char *output, /* Output buffer; must have size >= length */ 1422 const char *errors /* error handling */ 1423 ); 1424#endif 1425 1426/* Transforms code points that have decimal digit property to the 1427 corresponding ASCII digit code points. 1428 1429 Returns a new Unicode string on success, NULL on failure. 1430*/ 1431 1432#ifndef Py_LIMITED_API 1433PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( 1434 Py_UNICODE *s, /* Unicode buffer */ 1435 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 1436 ); 1437#endif 1438 1439/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject 1440 as argument instead of a raw buffer and length. This function additionally 1441 transforms spaces to ASCII because this is what the callers in longobject, 1442 floatobject, and complexobject did anyways. */ 1443 1444#ifndef Py_LIMITED_API 1445PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( 1446 PyObject *unicode /* Unicode object */ 1447 ); 1448#endif 1449 1450/* --- File system encoding ---------------------------------------------- */ 1451 1452/* ParseTuple converter: encode str objects to bytes using 1453 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 1454 1455PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); 1456 1457/* ParseTuple converter: decode bytes objects to unicode using 1458 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 1459 1460PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); 1461 1462/* Decode a null-terminated string using Py_FileSystemDefaultEncoding 1463 and the "surrogateescape" error handler. 1464 1465 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1466 encoding. 1467 1468 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 1469*/ 1470 1471PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( 1472 const char *s /* encoded string */ 1473 ); 1474 1475/* Decode a string using Py_FileSystemDefaultEncoding 1476 and the "surrogateescape" error handler. 1477 1478 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1479 encoding. 1480*/ 1481 1482PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( 1483 const char *s, /* encoded string */ 1484 Py_ssize_t size /* size */ 1485 ); 1486 1487/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the 1488 "surrogateescape" error handler, and return bytes. 1489 1490 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1491 encoding. 1492*/ 1493 1494PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( 1495 PyObject *unicode 1496 ); 1497 1498/* --- Methods & Slots ---------------------------------------------------- 1499 1500 These are capable of handling Unicode objects and strings on input 1501 (we refer to them as strings in the descriptions) and return 1502 Unicode objects or integers as apporpriate. */ 1503 1504/* Concat two strings giving a new Unicode string. */ 1505 1506PyAPI_FUNC(PyObject*) PyUnicode_Concat( 1507 PyObject *left, /* Left string */ 1508 PyObject *right /* Right string */ 1509 ); 1510 1511/* Concat two strings and put the result in *pleft 1512 (sets *pleft to NULL on error) */ 1513 1514PyAPI_FUNC(void) PyUnicode_Append( 1515 PyObject **pleft, /* Pointer to left string */ 1516 PyObject *right /* Right string */ 1517 ); 1518 1519/* Concat two strings, put the result in *pleft and drop the right object 1520 (sets *pleft to NULL on error) */ 1521 1522PyAPI_FUNC(void) PyUnicode_AppendAndDel( 1523 PyObject **pleft, /* Pointer to left string */ 1524 PyObject *right /* Right string */ 1525 ); 1526 1527/* Split a string giving a list of Unicode strings. 1528 1529 If sep is NULL, splitting will be done at all whitespace 1530 substrings. Otherwise, splits occur at the given separator. 1531 1532 At most maxsplit splits will be done. If negative, no limit is set. 1533 1534 Separators are not included in the resulting list. 1535 1536*/ 1537 1538PyAPI_FUNC(PyObject*) PyUnicode_Split( 1539 PyObject *s, /* String to split */ 1540 PyObject *sep, /* String separator */ 1541 Py_ssize_t maxsplit /* Maxsplit count */ 1542 ); 1543 1544/* Dito, but split at line breaks. 1545 1546 CRLF is considered to be one line break. Line breaks are not 1547 included in the resulting list. */ 1548 1549PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( 1550 PyObject *s, /* String to split */ 1551 int keepends /* If true, line end markers are included */ 1552 ); 1553 1554/* Partition a string using a given separator. */ 1555 1556PyAPI_FUNC(PyObject*) PyUnicode_Partition( 1557 PyObject *s, /* String to partition */ 1558 PyObject *sep /* String separator */ 1559 ); 1560 1561/* Partition a string using a given separator, searching from the end of the 1562 string. */ 1563 1564PyAPI_FUNC(PyObject*) PyUnicode_RPartition( 1565 PyObject *s, /* String to partition */ 1566 PyObject *sep /* String separator */ 1567 ); 1568 1569/* Split a string giving a list of Unicode strings. 1570 1571 If sep is NULL, splitting will be done at all whitespace 1572 substrings. Otherwise, splits occur at the given separator. 1573 1574 At most maxsplit splits will be done. But unlike PyUnicode_Split 1575 PyUnicode_RSplit splits from the end of the string. If negative, 1576 no limit is set. 1577 1578 Separators are not included in the resulting list. 1579 1580*/ 1581 1582PyAPI_FUNC(PyObject*) PyUnicode_RSplit( 1583 PyObject *s, /* String to split */ 1584 PyObject *sep, /* String separator */ 1585 Py_ssize_t maxsplit /* Maxsplit count */ 1586 ); 1587 1588/* Translate a string by applying a character mapping table to it and 1589 return the resulting Unicode object. 1590 1591 The mapping table must map Unicode ordinal integers to Unicode 1592 ordinal integers or None (causing deletion of the character). 1593 1594 Mapping tables may be dictionaries or sequences. Unmapped character 1595 ordinals (ones which cause a LookupError) are left untouched and 1596 are copied as-is. 1597 1598*/ 1599 1600PyAPI_FUNC(PyObject *) PyUnicode_Translate( 1601 PyObject *str, /* String */ 1602 PyObject *table, /* Translate table */ 1603 const char *errors /* error handling */ 1604 ); 1605 1606/* Join a sequence of strings using the given separator and return 1607 the resulting Unicode string. */ 1608 1609PyAPI_FUNC(PyObject*) PyUnicode_Join( 1610 PyObject *separator, /* Separator string */ 1611 PyObject *seq /* Sequence object */ 1612 ); 1613 1614/* Return 1 if substr matches str[start:end] at the given tail end, 0 1615 otherwise. */ 1616 1617PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( 1618 PyObject *str, /* String */ 1619 PyObject *substr, /* Prefix or Suffix string */ 1620 Py_ssize_t start, /* Start index */ 1621 Py_ssize_t end, /* Stop index */ 1622 int direction /* Tail end: -1 prefix, +1 suffix */ 1623 ); 1624 1625/* Return the first position of substr in str[start:end] using the 1626 given search direction or -1 if not found. -2 is returned in case 1627 an error occurred and an exception is set. */ 1628 1629PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( 1630 PyObject *str, /* String */ 1631 PyObject *substr, /* Substring to find */ 1632 Py_ssize_t start, /* Start index */ 1633 Py_ssize_t end, /* Stop index */ 1634 int direction /* Find direction: +1 forward, -1 backward */ 1635 ); 1636 1637/* Like PyUnicode_Find, but search for single character only. */ 1638PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( 1639 PyObject *str, 1640 Py_UCS4 ch, 1641 Py_ssize_t start, 1642 Py_ssize_t end, 1643 int direction 1644 ); 1645 1646/* Count the number of occurrences of substr in str[start:end]. */ 1647 1648PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( 1649 PyObject *str, /* String */ 1650 PyObject *substr, /* Substring to count */ 1651 Py_ssize_t start, /* Start index */ 1652 Py_ssize_t end /* Stop index */ 1653 ); 1654 1655/* Replace at most maxcount occurrences of substr in str with replstr 1656 and return the resulting Unicode object. */ 1657 1658PyAPI_FUNC(PyObject *) PyUnicode_Replace( 1659 PyObject *str, /* String */ 1660 PyObject *substr, /* Substring to find */ 1661 PyObject *replstr, /* Substring to replace */ 1662 Py_ssize_t maxcount /* Max. number of replacements to apply; 1663 -1 = all */ 1664 ); 1665 1666/* Compare two strings and return -1, 0, 1 for less than, equal, 1667 greater than resp. */ 1668 1669PyAPI_FUNC(int) PyUnicode_Compare( 1670 PyObject *left, /* Left string */ 1671 PyObject *right /* Right string */ 1672 ); 1673 1674PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( 1675 PyObject *left, 1676 const char *right /* ASCII-encoded string */ 1677 ); 1678 1679/* Rich compare two strings and return one of the following: 1680 1681 - NULL in case an exception was raised 1682 - Py_True or Py_False for successfuly comparisons 1683 - Py_NotImplemented in case the type combination is unknown 1684 1685 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1686 case the conversion of the arguments to Unicode fails with a 1687 UnicodeDecodeError. 1688 1689 Possible values for op: 1690 1691 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1692 1693*/ 1694 1695PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( 1696 PyObject *left, /* Left string */ 1697 PyObject *right, /* Right string */ 1698 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ 1699 ); 1700 1701/* Apply a argument tuple or dictionary to a format string and return 1702 the resulting Unicode string. */ 1703 1704PyAPI_FUNC(PyObject *) PyUnicode_Format( 1705 PyObject *format, /* Format string */ 1706 PyObject *args /* Argument tuple or dictionary */ 1707 ); 1708 1709/* Checks whether element is contained in container and return 1/0 1710 accordingly. 1711 1712 element has to coerce to an one element Unicode string. -1 is 1713 returned in case of an error. */ 1714 1715PyAPI_FUNC(int) PyUnicode_Contains( 1716 PyObject *container, /* Container string */ 1717 PyObject *element /* Element string */ 1718 ); 1719 1720/* Checks whether argument is a valid identifier. */ 1721 1722PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); 1723 1724#ifndef Py_LIMITED_API 1725/* Externally visible for str.strip(unicode) */ 1726PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( 1727 PyUnicodeObject *self, 1728 int striptype, 1729 PyObject *sepobj 1730 ); 1731#endif 1732 1733/* Using the current locale, insert the thousands grouping 1734 into the string pointed to by buffer. For the argument descriptions, 1735 see Objects/stringlib/localeutil.h */ 1736 1737#ifndef Py_LIMITED_API 1738PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer, 1739 Py_ssize_t n_buffer, 1740 Py_UNICODE *digits, 1741 Py_ssize_t n_digits, 1742 Py_ssize_t min_width); 1743#endif 1744 1745/* Using explicit passed-in values, insert the thousands grouping 1746 into the string pointed to by buffer. For the argument descriptions, 1747 see Objects/stringlib/localeutil.h */ 1748#ifndef Py_LIMITED_API 1749PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( 1750 int kind, 1751 void *buffer, 1752 Py_ssize_t n_buffer, 1753 void *digits, 1754 Py_ssize_t n_digits, 1755 Py_ssize_t min_width, 1756 const char *grouping, 1757 const char *thousands_sep); 1758#endif 1759/* === Characters Type APIs =============================================== */ 1760 1761/* Helper array used by Py_UNICODE_ISSPACE(). */ 1762 1763#ifndef Py_LIMITED_API 1764PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; 1765 1766/* These should not be used directly. Use the Py_UNICODE_IS* and 1767 Py_UNICODE_TO* macros instead. 1768 1769 These APIs are implemented in Objects/unicodectype.c. 1770 1771*/ 1772 1773PyAPI_FUNC(int) _PyUnicode_IsLowercase( 1774 Py_UCS4 ch /* Unicode character */ 1775 ); 1776 1777PyAPI_FUNC(int) _PyUnicode_IsUppercase( 1778 Py_UCS4 ch /* Unicode character */ 1779 ); 1780 1781PyAPI_FUNC(int) _PyUnicode_IsTitlecase( 1782 Py_UCS4 ch /* Unicode character */ 1783 ); 1784 1785PyAPI_FUNC(int) _PyUnicode_IsXidStart( 1786 Py_UCS4 ch /* Unicode character */ 1787 ); 1788 1789PyAPI_FUNC(int) _PyUnicode_IsXidContinue( 1790 Py_UCS4 ch /* Unicode character */ 1791 ); 1792 1793PyAPI_FUNC(int) _PyUnicode_IsWhitespace( 1794 const Py_UCS4 ch /* Unicode character */ 1795 ); 1796 1797PyAPI_FUNC(int) _PyUnicode_IsLinebreak( 1798 const Py_UCS4 ch /* Unicode character */ 1799 ); 1800 1801PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( 1802 Py_UCS4 ch /* Unicode character */ 1803 ); 1804 1805PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( 1806 Py_UCS4 ch /* Unicode character */ 1807 ); 1808 1809PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( 1810 Py_UCS4 ch /* Unicode character */ 1811 ); 1812 1813PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( 1814 Py_UCS4 ch /* Unicode character */ 1815 ); 1816 1817PyAPI_FUNC(int) _PyUnicode_ToDigit( 1818 Py_UCS4 ch /* Unicode character */ 1819 ); 1820 1821PyAPI_FUNC(double) _PyUnicode_ToNumeric( 1822 Py_UCS4 ch /* Unicode character */ 1823 ); 1824 1825PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( 1826 Py_UCS4 ch /* Unicode character */ 1827 ); 1828 1829PyAPI_FUNC(int) _PyUnicode_IsDigit( 1830 Py_UCS4 ch /* Unicode character */ 1831 ); 1832 1833PyAPI_FUNC(int) _PyUnicode_IsNumeric( 1834 Py_UCS4 ch /* Unicode character */ 1835 ); 1836 1837PyAPI_FUNC(int) _PyUnicode_IsPrintable( 1838 Py_UCS4 ch /* Unicode character */ 1839 ); 1840 1841PyAPI_FUNC(int) _PyUnicode_IsAlpha( 1842 Py_UCS4 ch /* Unicode character */ 1843 ); 1844 1845PyAPI_FUNC(size_t) Py_UNICODE_strlen( 1846 const Py_UNICODE *u 1847 ); 1848 1849PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( 1850 Py_UNICODE *s1, 1851 const Py_UNICODE *s2); 1852 1853PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( 1854 Py_UNICODE *s1, const Py_UNICODE *s2); 1855 1856PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( 1857 Py_UNICODE *s1, 1858 const Py_UNICODE *s2, 1859 size_t n); 1860 1861PyAPI_FUNC(int) Py_UNICODE_strcmp( 1862 const Py_UNICODE *s1, 1863 const Py_UNICODE *s2 1864 ); 1865 1866PyAPI_FUNC(int) Py_UNICODE_strncmp( 1867 const Py_UNICODE *s1, 1868 const Py_UNICODE *s2, 1869 size_t n 1870 ); 1871 1872PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( 1873 const Py_UNICODE *s, 1874 Py_UNICODE c 1875 ); 1876 1877PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( 1878 const Py_UNICODE *s, 1879 Py_UNICODE c 1880 ); 1881 1882PyAPI_FUNC(size_t) Py_UCS4_strlen( 1883 const Py_UCS4 *u 1884 ); 1885 1886PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy( 1887 Py_UCS4 *s1, 1888 const Py_UCS4 *s2); 1889 1890PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat( 1891 Py_UCS4 *s1, const Py_UCS4 *s2); 1892 1893PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy( 1894 Py_UCS4 *s1, 1895 const Py_UCS4 *s2, 1896 size_t n); 1897 1898PyAPI_FUNC(int) Py_UCS4_strcmp( 1899 const Py_UCS4 *s1, 1900 const Py_UCS4 *s2 1901 ); 1902 1903PyAPI_FUNC(int) Py_UCS4_strncmp( 1904 const Py_UCS4 *s1, 1905 const Py_UCS4 *s2, 1906 size_t n 1907 ); 1908 1909PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr( 1910 const Py_UCS4 *s, 1911 Py_UCS4 c 1912 ); 1913 1914PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr( 1915 const Py_UCS4 *s, 1916 Py_UCS4 c 1917 ); 1918 1919/* Create a copy of a unicode string ending with a nul character. Return NULL 1920 and raise a MemoryError exception on memory allocation failure, otherwise 1921 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 1922 1923PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( 1924 PyObject *unicode 1925 ); 1926#endif /* Py_LIMITED_API */ 1927 1928#ifdef __cplusplus 1929} 1930#endif 1931#endif /* !Py_UNICODEOBJECT_H */ 1932