unicodeobject.h revision 37943769ef7594c9fb6a0c23ff4094376b49c3ea
1#ifndef Py_UNICODEOBJECT_H 2#define Py_UNICODEOBJECT_H 3 4#include <stdarg.h> 5 6/* 7 8Unicode implementation based on original code by Fredrik Lundh, 9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the 10Unicode Integration Proposal. (See 11http://www.egenix.com/files/python/unicode-proposal.txt). 12 13Copyright (c) Corporation for National Research Initiatives. 14 15 16 Original header: 17 -------------------------------------------------------------------- 18 19 * Yet another Unicode string type for Python. This type supports the 20 * 16-bit Basic Multilingual Plane (BMP) only. 21 * 22 * Written by Fredrik Lundh, January 1999. 23 * 24 * Copyright (c) 1999 by Secret Labs AB. 25 * Copyright (c) 1999 by Fredrik Lundh. 26 * 27 * fredrik@pythonware.com 28 * http://www.pythonware.com 29 * 30 * -------------------------------------------------------------------- 31 * This Unicode String Type is 32 * 33 * Copyright (c) 1999 by Secret Labs AB 34 * Copyright (c) 1999 by Fredrik Lundh 35 * 36 * By obtaining, using, and/or copying this software and/or its 37 * associated documentation, you agree that you have read, understood, 38 * and will comply with the following terms and conditions: 39 * 40 * Permission to use, copy, modify, and distribute this software and its 41 * associated documentation for any purpose and without fee is hereby 42 * granted, provided that the above copyright notice appears in all 43 * copies, and that both that copyright notice and this permission notice 44 * appear in supporting documentation, and that the name of Secret Labs 45 * AB or the author not be used in advertising or publicity pertaining to 46 * distribution of the software without specific, written prior 47 * permission. 48 * 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 56 * -------------------------------------------------------------------- */ 57 58#include <ctype.h> 59 60/* === Internal API ======================================================= */ 61 62/* --- Internal Unicode Format -------------------------------------------- */ 63 64/* Python 3.x requires unicode */ 65#define Py_USING_UNICODE 66 67#ifndef SIZEOF_WCHAR_T 68#error Must define SIZEOF_WCHAR_T 69#endif 70 71#define Py_UNICODE_SIZE SIZEOF_WCHAR_T 72 73/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support 75 for UTF-16) */ 76 77#if Py_UNICODE_SIZE >= 4 78#define Py_UNICODE_WIDE 79#endif 80 81/* Set these flags if the platform has "wchar.h" and the 82 wchar_t type is a 16-bit unsigned type */ 83/* #define HAVE_WCHAR_H */ 84/* #define HAVE_USABLE_WCHAR_T */ 85 86/* Py_UNICODE was the native Unicode storage format (code unit) used by 87 Python and represents a single Unicode element in the Unicode type. 88 With PEP 393, Py_UNICODE is deprected and replaced with a 89 typedef to wchar_t. */ 90 91#ifndef Py_LIMITED_API 92#define PY_UNICODE_TYPE wchar_t 93typedef wchar_t Py_UNICODE; 94#endif 95 96/* If the compiler provides a wchar_t type we try to support it 97 through the interface functions PyUnicode_FromWideChar(), 98 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ 99 100#ifdef HAVE_USABLE_WCHAR_T 101# ifndef HAVE_WCHAR_H 102# define HAVE_WCHAR_H 103# endif 104#endif 105 106#if defined(MS_WINDOWS) 107# define HAVE_MBCS 108#endif 109 110#ifdef HAVE_WCHAR_H 111/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ 112# ifdef _HAVE_BSDI 113# include <time.h> 114# endif 115# include <wchar.h> 116#endif 117 118/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve 119 unicode representations. */ 120#if SIZEOF_INT >= 4 121typedef unsigned int Py_UCS4; 122#elif SIZEOF_LONG >= 4 123typedef unsigned long Py_UCS4; 124#else 125#error "Could not find a proper typedef for Py_UCS4" 126#endif 127 128typedef unsigned short Py_UCS2; 129typedef unsigned char Py_UCS1; 130 131/* --- Internal Unicode Operations ---------------------------------------- */ 132 133/* Since splitting on whitespace is an important use case, and 134 whitespace in most situations is solely ASCII whitespace, we 135 optimize for the common case by using a quick look-up table 136 _Py_ascii_whitespace (see below) with an inlined check. 137 138 */ 139#ifndef Py_LIMITED_API 140#define Py_UNICODE_ISSPACE(ch) \ 141 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) 142 143#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) 144#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) 145#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) 146#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) 147 148#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) 149#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) 150#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) 151 152#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) 153#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) 154#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) 155#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) 156 157#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) 158#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) 159#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) 160 161#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) 162 163#define Py_UNICODE_ISALNUM(ch) \ 164 (Py_UNICODE_ISALPHA(ch) || \ 165 Py_UNICODE_ISDECIMAL(ch) || \ 166 Py_UNICODE_ISDIGIT(ch) || \ 167 Py_UNICODE_ISNUMERIC(ch)) 168 169#define Py_UNICODE_COPY(target, source, length) \ 170 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) 171 172#define Py_UNICODE_FILL(target, value, length) \ 173 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ 174 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ 175 } while (0) 176 177/* macros to work with surrogates */ 178#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) 179#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) 180#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) 181/* Join two surrogate characters and return a single Py_UCS4 value. */ 182#define Py_UNICODE_JOIN_SURROGATES(high, low) \ 183 (((((Py_UCS4)(high) & 0x03FF) << 10) | \ 184 ((Py_UCS4)(low) & 0x03FF)) + 0x10000) 185 186/* Check if substring matches at given offset. The offset must be 187 valid, and the substring must not be empty. */ 188 189#define Py_UNICODE_MATCH(string, offset, substring) \ 190 ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ 191 ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ 192 !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) 193 194#endif /* Py_LIMITED_API */ 195 196#ifdef __cplusplus 197extern "C" { 198#endif 199 200/* --- Unicode Type ------------------------------------------------------- */ 201 202#ifndef Py_LIMITED_API 203 204/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject 205 structure. state.ascii and state.compact are set, and the data 206 immediately follow the structure. utf8_length and wstr_length can be found 207 in the length field; the utf8 pointer is equal to the data pointer. */ 208typedef struct { 209 PyObject_HEAD 210 Py_ssize_t length; /* Number of code points in the string */ 211 Py_hash_t hash; /* Hash value; -1 if not set */ 212 struct { 213 /* 214 SSTATE_NOT_INTERNED (0) 215 SSTATE_INTERNED_MORTAL (1) 216 SSTATE_INTERNED_IMMORTAL (2) 217 218 If interned != SSTATE_NOT_INTERNED, the two references from the 219 dictionary to this object are *not* counted in ob_refcnt. 220 */ 221 unsigned int interned:2; 222 /* Character size: 223 224 PyUnicode_WCHAR_KIND (0): wchar_t* 225 PyUnicode_1BYTE_KIND (1): Py_UCS1* 226 PyUnicode_2BYTE_KIND (2): Py_UCS2* 227 PyUnicode_4BYTE_KIND (3): Py_UCS4* 228 */ 229 unsigned int kind:2; 230 /* Compact is with respect to the allocation scheme. Compact unicode 231 objects only require one memory block while non-compact objects use 232 one block for the PyUnicodeObject struct and another for its data 233 buffer. */ 234 unsigned int compact:1; 235 /* Compact objects which are ASCII-only also have the state.compact 236 flag set, and use the PyASCIIObject struct. */ 237 unsigned int ascii:1; 238 /* The ready flag indicates whether the object layout is initialized 239 completely. This means that this is either a compact object, or 240 the data pointer is filled out. The bit is redundant, and helps 241 to minimize the test in PyUnicode_IS_READY(). */ 242 unsigned int ready:1; 243 } state; 244 wchar_t *wstr; /* wchar_t representation (null-terminated) */ 245} PyASCIIObject; 246 247/* Non-ASCII strings allocated through PyUnicode_New use the 248 PyCompactUnicodeOject structure. state.compact is set, and the data 249 immediately follow the structure. */ 250typedef struct { 251 PyASCIIObject _base; 252 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the 253 * terminating \0. */ 254 char *utf8; /* UTF-8 representation (null-terminated) */ 255 Py_ssize_t wstr_length; /* Number of code points in wstr, possible 256 * surrogates count as two code points. */ 257} PyCompactUnicodeObject; 258 259/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the 260 PyUnicodeObject structure. The actual string data is initially in the wstr 261 block, and copied into the data block using PyUnicode_Ready. */ 262typedef struct { 263 PyCompactUnicodeObject _base; 264 union { 265 void *any; 266 Py_UCS1 *latin1; 267 Py_UCS2 *ucs2; 268 Py_UCS4 *ucs4; 269 } data; /* Canonical, smallest-form Unicode buffer */ 270} PyUnicodeObject; 271#endif 272 273PyAPI_DATA(PyTypeObject) PyUnicode_Type; 274PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; 275 276#define PyUnicode_Check(op) \ 277 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) 278#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) 279 280/* Fast access macros */ 281#ifndef Py_LIMITED_API 282 283#define PyUnicode_WSTR_LENGTH(op) \ 284 (((PyASCIIObject*)op)->state.ascii ? \ 285 ((PyASCIIObject*)op)->length : \ 286 ((PyCompactUnicodeObject*)op)->wstr_length) 287 288/* Returns the deprecated Py_UNICODE representation's size in code units 289 (this includes surrogate pairs as 2 units). 290 If the Py_UNICODE representation is not available, it will be computed 291 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ 292 293#define PyUnicode_GET_SIZE(op) \ 294 (assert(PyUnicode_Check(op)), \ 295 (((PyASCIIObject *)(op))->wstr) ? \ 296 PyUnicode_WSTR_LENGTH(op) : \ 297 ((void)PyUnicode_AsUnicode((PyObject *)(op)), \ 298 PyUnicode_WSTR_LENGTH(op))) 299 300#define PyUnicode_GET_DATA_SIZE(op) \ 301 (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) 302 303/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE 304 representation on demand. Using this macro is very inefficient now, 305 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or 306 use PyUnicode_WRITE() and PyUnicode_READ(). */ 307 308#define PyUnicode_AS_UNICODE(op) \ 309 (assert(PyUnicode_Check(op)), \ 310 (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ 311 PyUnicode_AsUnicode((PyObject *)(op))) 312 313#define PyUnicode_AS_DATA(op) \ 314 ((const char *)(PyUnicode_AS_UNICODE(op))) 315 316 317/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */ 318 319/* Values for PyUnicodeObject.state: */ 320 321/* Interning state. */ 322#define SSTATE_NOT_INTERNED 0 323#define SSTATE_INTERNED_MORTAL 1 324#define SSTATE_INTERNED_IMMORTAL 2 325 326#define PyUnicode_IS_COMPACT_ASCII(op) (((PyASCIIObject*)op)->state.ascii) 327 328/* String contains only wstr byte characters. This is only possible 329 when the string was created with a legacy API and PyUnicode_Ready() 330 has not been called yet. */ 331#define PyUnicode_WCHAR_KIND 0 332 333/* Return values of the PyUnicode_KIND() macro: */ 334 335#define PyUnicode_1BYTE_KIND 1 336#define PyUnicode_2BYTE_KIND 2 337#define PyUnicode_4BYTE_KIND 3 338 339 340/* Return the number of bytes the string uses to represent single characters, 341 this can be 1, 2 or 4. 342 343 See also PyUnicode_KIND_SIZE(). */ 344#define PyUnicode_CHARACTER_SIZE(op) \ 345 (1 << (PyUnicode_KIND(op) - 1)) 346 347/* Return pointers to the canonical representation casted as unsigned char, 348 Py_UCS2, or Py_UCS4 for direct character access. 349 No checks are performed, use PyUnicode_CHARACTER_SIZE or 350 PyUnicode_KIND() before to ensure these will work correctly. */ 351 352#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) 353#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) 354#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) 355 356/* Return true if the string is compact or 0 if not. 357 No type checks or Ready calls are performed. */ 358#define PyUnicode_IS_COMPACT(op) \ 359 (((PyASCIIObject*)(op))->state.compact) 360 361/* Return one of the PyUnicode_*_KIND values defined above. */ 362#define PyUnicode_KIND(op) \ 363 (assert(PyUnicode_Check(op)), \ 364 assert(PyUnicode_IS_READY(op)), \ 365 ((PyASCIIObject *)(op))->state.kind) 366 367/* Return a void pointer to the raw unicode buffer. */ 368#define _PyUnicode_COMPACT_DATA(op) \ 369 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 370 ((void*)((PyASCIIObject*)(op) + 1)) : \ 371 ((void*)((PyCompactUnicodeObject*)(op) + 1))) 372 373#define _PyUnicode_NONCOMPACT_DATA(op) \ 374 (assert(((PyUnicodeObject*)(op))->data.any), \ 375 ((((PyUnicodeObject *)(op))->data.any))) 376 377#define PyUnicode_DATA(op) \ 378 (assert(PyUnicode_Check(op)), \ 379 PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ 380 _PyUnicode_NONCOMPACT_DATA(op)) 381 382/* Compute (index * char_size) where char_size is 2 ** (kind - 1). 383 The index is a character index, the result is a size in bytes. 384 385 See also PyUnicode_CHARACTER_SIZE(). */ 386#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1)) 387 388/* In the access macros below, "kind" may be evaluated more than once. 389 All other macro parameters are evaluated exactly once, so it is safe 390 to put side effects into them (such as increasing the index). */ 391 392/* Write into the canonical representation, this macro does not do any sanity 393 checks and is intended for usage in loops. The caller should cache the 394 kind and data pointers optained form other macro calls. 395 index is the index in the string (starts at 0) and value is the new 396 code point value which shoule be written to that location. */ 397#define PyUnicode_WRITE(kind, data, index, value) \ 398 do { \ 399 switch ((kind)) { \ 400 case PyUnicode_1BYTE_KIND: { \ 401 ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ 402 break; \ 403 } \ 404 case PyUnicode_2BYTE_KIND: { \ 405 ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ 406 break; \ 407 } \ 408 default: { \ 409 assert((kind) == PyUnicode_4BYTE_KIND); \ 410 ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ 411 } \ 412 } \ 413 } while (0) 414 415/* Read a code point form the string's canonical representation. No checks 416 or ready calls are performed. */ 417#define PyUnicode_READ(kind, data, index) \ 418 ((Py_UCS4) \ 419 ((kind) == PyUnicode_1BYTE_KIND ? \ 420 ((const Py_UCS1 *)(data))[(index)] : \ 421 ((kind) == PyUnicode_2BYTE_KIND ? \ 422 ((const Py_UCS2 *)(data))[(index)] : \ 423 ((const Py_UCS4 *)(data))[(index)] \ 424 ) \ 425 )) 426 427/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it 428 calls PyUnicode_KIND() and might call it twice. For single reads, use 429 PyUnicode_READ_CHAR, for multiple consecutive reads callers should 430 cache kind and use PyUnicode_READ instead. */ 431#define PyUnicode_READ_CHAR(unicode, index) \ 432 (assert(PyUnicode_Check(unicode)), \ 433 assert(PyUnicode_IS_READY(unicode)), \ 434 (Py_UCS4) \ 435 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ 436 ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ 437 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ 438 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ 439 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ 440 ) \ 441 )) 442 443/* Returns the length of the unicode string. The caller has to make sure that 444 the string has it's canonical representation set before calling 445 this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ 446#define PyUnicode_GET_LENGTH(op) \ 447 (assert(PyUnicode_Check(op)), \ 448 assert(PyUnicode_IS_READY(op)), \ 449 ((PyASCIIObject *)(op))->length) 450 451 452/* Fast check to determine whether an object is ready. Equivalent to 453 PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ 454 455#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) 456 457/* PyUnicode_READY() does less work than PyUnicode_Ready() in the best 458 case. If the canonical representation is not yet set, it will still call 459 PyUnicode_Ready(). 460 Returns 0 on success and -1 on errors. */ 461#define PyUnicode_READY(op) \ 462 (assert(PyUnicode_Check(op)), \ 463 (PyUnicode_IS_READY(op) ? \ 464 0 : _PyUnicode_Ready((PyObject *)(op)))) 465 466/* Return a maximum character value which is suitable for creating another 467 string based on op. This is always an approximation but more efficient 468 than interating over the string. */ 469#define PyUnicode_MAX_CHAR_VALUE(op) \ 470 (assert(PyUnicode_IS_READY(op)), \ 471 (PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f: \ 472 (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ 473 (PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \ 474 (0x7fU) : (0xffU) \ 475 ) : \ 476 (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ 477 (0xffffU) : (0x10ffffU) \ 478 )))) 479 480#endif 481 482/* --- Constants ---------------------------------------------------------- */ 483 484/* This Unicode character will be used as replacement character during 485 decoding if the errors argument is set to "replace". Note: the 486 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 487 Unicode 3.0. */ 488 489#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) 490 491/* === Public API ========================================================= */ 492 493/* --- Plain Py_UNICODE --------------------------------------------------- */ 494 495/* With PEP 393, this is the recommended way to allocate a new unicode object. 496 This function will allocate the object and its buffer in a single memory 497 block. Objects created using this function are not resizable. */ 498#ifndef Py_LIMITED_API 499PyAPI_FUNC(PyObject*) PyUnicode_New( 500 Py_ssize_t size, /* Number of code points in the new string */ 501 Py_UCS4 maxchar /* maximum code point value in the string */ 502 ); 503#endif 504 505/* Initializes the canonical string representation from a the deprecated 506 wstr/Py_UNICODE representation. This function is used to convert Unicode 507 objects which were created using the old API to the new flexible format 508 introduced with PEP 393. 509 510 Don't call this function directly, use the public PyUnicode_READY() macro 511 instead. */ 512#ifndef Py_LIMITED_API 513PyAPI_FUNC(int) _PyUnicode_Ready( 514 PyObject *unicode /* Unicode object */ 515 ); 516#endif 517 518/* Get a copy of a Unicode string. */ 519PyAPI_FUNC(PyObject*) PyUnicode_Copy( 520 PyObject *unicode 521 ); 522 523/* Copy character from one unicode object into another, this function performs 524 character conversion when necessary and falls back to memcpy if possible. 525 526 Fail if to is too small (smaller than how_many or smaller than 527 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > 528 kind(to), or if to has more than 1 reference. 529 530 Return the number of written character, or return -1 and raise an exception 531 on error. 532 533 Pseudo-code: 534 535 how_many = min(how_many, len(from) - from_start) 536 to[to_start:to_start+how_many] = from[from_start:from_start+how_many] 537 return how_many 538 539 Note: The function doesn't write a terminating null character. 540 */ 541#ifndef Py_LIMITED_API 542PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( 543 PyObject *to, 544 Py_ssize_t to_start, 545 PyObject *from, 546 Py_ssize_t from_start, 547 Py_ssize_t how_many 548 ); 549#endif 550 551/* Create a Unicode Object from the Py_UNICODE buffer u of the given 552 size. 553 554 u may be NULL which causes the contents to be undefined. It is the 555 user's responsibility to fill in the needed data afterwards. Note 556 that modifying the Unicode object contents after construction is 557 only allowed if u was set to NULL. 558 559 The buffer is copied into the new object. */ 560 561#ifndef Py_LIMITED_API 562PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( 563 const Py_UNICODE *u, /* Unicode buffer */ 564 Py_ssize_t size /* size of buffer */ 565 ); 566#endif 567 568/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ 569PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( 570 const char *u, /* UTF-8 encoded string */ 571 Py_ssize_t size /* size of buffer */ 572 ); 573 574/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated 575 UTF-8 encoded bytes. The size is determined with strlen(). */ 576PyAPI_FUNC(PyObject*) PyUnicode_FromString( 577 const char *u /* UTF-8 encoded string */ 578 ); 579 580#ifndef Py_LIMITED_API 581PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( 582 int kind, 583 const void *buffer, 584 Py_ssize_t size); 585#endif 586 587PyAPI_FUNC(PyObject*) PyUnicode_Substring( 588 PyObject *str, 589 Py_ssize_t start, 590 Py_ssize_t end); 591 592/* Copy the string into a UCS4 buffer including the null character is copy_null 593 is set. Return NULL and raise an exception on error. Raise a ValueError if 594 the buffer is smaller than the string. Return buffer on success. 595 596 buflen is the length of the buffer in (Py_UCS4) characters. */ 597PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( 598 PyObject *unicode, 599 Py_UCS4* buffer, 600 Py_ssize_t buflen, 601 int copy_null); 602 603/* Copy the string into a UCS4 buffer. A new buffer is allocated using 604 * PyMem_Malloc; if this fails, NULL is returned with a memory error 605 exception set. */ 606PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); 607 608/* Return a read-only pointer to the Unicode object's internal 609 Py_UNICODE buffer. 610 If the wchar_t/Py_UNICODE representation is not yet available, this 611 function will calculate it. */ 612 613#ifndef Py_LIMITED_API 614PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( 615 PyObject *unicode /* Unicode object */ 616 ); 617#endif 618 619/* Return a read-only pointer to the Unicode object's internal 620 Py_UNICODE buffer and save the length at size. 621 If the wchar_t/Py_UNICODE representation is not yet available, this 622 function will calculate it. */ 623 624#ifndef Py_LIMITED_API 625PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( 626 PyObject *unicode, /* Unicode object */ 627 Py_ssize_t *size /* location where to save the length */ 628 ); 629#endif 630 631/* Get the length of the Unicode object. */ 632 633PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( 634 PyObject *unicode 635); 636 637/* Get the number of Py_UNICODE units in the 638 string representation. */ 639 640PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( 641 PyObject *unicode /* Unicode object */ 642 ); 643 644/* Read a character from the string. */ 645 646PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( 647 PyObject *unicode, 648 Py_ssize_t index 649 ); 650 651/* Write a character to the string. The string must have been created through 652 PyUnicode_New, must not be shared, and must not have been hashed yet. 653 654 Return 0 on success, -1 on error. */ 655 656PyAPI_FUNC(int) PyUnicode_WriteChar( 657 PyObject *unicode, 658 Py_ssize_t index, 659 Py_UCS4 character 660 ); 661 662#ifndef Py_LIMITED_API 663/* Get the maximum ordinal for a Unicode character. */ 664PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); 665#endif 666 667/* Resize an already allocated Unicode object to the new size length. 668 669 *unicode is modified to point to the new (resized) object and 0 670 returned on success. 671 672 This API may only be called by the function which also called the 673 Unicode constructor. The refcount on the object must be 1. Otherwise, 674 an error is returned. 675 676 Error handling is implemented as follows: an exception is set, -1 677 is returned and *unicode left untouched. 678 679*/ 680 681PyAPI_FUNC(int) PyUnicode_Resize( 682 PyObject **unicode, /* Pointer to the Unicode object */ 683 Py_ssize_t length /* New length */ 684 ); 685 686/* Coerce obj to an Unicode object and return a reference with 687 *incremented* refcount. 688 689 Coercion is done in the following way: 690 691 1. bytes, bytearray and other char buffer compatible objects are decoded 692 under the assumptions that they contain data using the UTF-8 693 encoding. Decoding is done in "strict" mode. 694 695 2. All other objects (including Unicode objects) raise an 696 exception. 697 698 The API returns NULL in case of an error. The caller is responsible 699 for decref'ing the returned objects. 700 701*/ 702 703PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( 704 register PyObject *obj, /* Object */ 705 const char *encoding, /* encoding */ 706 const char *errors /* error handling */ 707 ); 708 709/* Coerce obj to an Unicode object and return a reference with 710 *incremented* refcount. 711 712 Unicode objects are passed back as-is (subclasses are converted to 713 true Unicode objects), all other objects are delegated to 714 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in 715 using UTF-8 encoding as basis for decoding the object. 716 717 The API returns NULL in case of an error. The caller is responsible 718 for decref'ing the returned objects. 719 720*/ 721 722PyAPI_FUNC(PyObject*) PyUnicode_FromObject( 723 register PyObject *obj /* Object */ 724 ); 725 726PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( 727 const char *format, /* ASCII-encoded string */ 728 va_list vargs 729 ); 730PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( 731 const char *format, /* ASCII-encoded string */ 732 ... 733 ); 734 735#ifndef Py_LIMITED_API 736/* Format the object based on the format_spec, as defined in PEP 3101 737 (Advanced String Formatting). */ 738PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, 739 PyObject *format_spec, 740 Py_ssize_t start, 741 Py_ssize_t end); 742#endif 743 744PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); 745PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); 746PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( 747 const char *u /* UTF-8 encoded string */ 748 ); 749#ifndef Py_LIMITED_API 750PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); 751#endif 752 753/* Use only if you know it's a string */ 754#define PyUnicode_CHECK_INTERNED(op) \ 755 (((PyASCIIObject *)(op))->state.interned) 756 757/* --- wchar_t support for platforms which support it --------------------- */ 758 759#ifdef HAVE_WCHAR_H 760 761/* Create a Unicode Object from the wchar_t buffer w of the given 762 size. 763 764 The buffer is copied into the new object. */ 765 766PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( 767 register const wchar_t *w, /* wchar_t buffer */ 768 Py_ssize_t size /* size of buffer */ 769 ); 770 771/* Copies the Unicode Object contents into the wchar_t buffer w. At 772 most size wchar_t characters are copied. 773 774 Note that the resulting wchar_t string may or may not be 775 0-terminated. It is the responsibility of the caller to make sure 776 that the wchar_t string is 0-terminated in case this is required by 777 the application. 778 779 Returns the number of wchar_t characters copied (excluding a 780 possibly trailing 0-termination character) or -1 in case of an 781 error. */ 782 783PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( 784 PyObject *unicode, /* Unicode object */ 785 register wchar_t *w, /* wchar_t buffer */ 786 Py_ssize_t size /* size of buffer */ 787 ); 788 789/* Convert the Unicode object to a wide character string. The output string 790 always ends with a nul character. If size is not NULL, write the number of 791 wide characters (excluding the null character) into *size. 792 793 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it) 794 on success. On error, returns NULL, *size is undefined and raises a 795 MemoryError. */ 796 797PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( 798 PyObject *unicode, /* Unicode object */ 799 Py_ssize_t *size /* number of characters of the result */ 800 ); 801 802#ifndef Py_LIMITED_API 803PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); 804#endif 805 806#endif 807 808/* --- Unicode ordinals --------------------------------------------------- */ 809 810/* Create a Unicode Object from the given Unicode code point ordinal. 811 812 The ordinal must be in range(0x10000) on narrow Python builds 813 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is 814 raised in case it is not. 815 816*/ 817 818PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); 819 820/* --- Free-list management ----------------------------------------------- */ 821 822/* Clear the free list used by the Unicode implementation. 823 824 This can be used to release memory used for objects on the free 825 list back to the Python memory allocator. 826 827*/ 828 829PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); 830 831/* === Builtin Codecs ===================================================== 832 833 Many of these APIs take two arguments encoding and errors. These 834 parameters encoding and errors have the same semantics as the ones 835 of the builtin str() API. 836 837 Setting encoding to NULL causes the default encoding (UTF-8) to be used. 838 839 Error handling is set by errors which may also be set to NULL 840 meaning to use the default handling defined for the codec. Default 841 error handling for all builtin codecs is "strict" (ValueErrors are 842 raised). 843 844 The codecs all use a similar interface. Only deviation from the 845 generic ones are documented. 846 847*/ 848 849/* --- Manage the default encoding ---------------------------------------- */ 850 851/* Returns a pointer to the default encoding (UTF-8) of the 852 Unicode object unicode and the size of the encoded representation 853 in bytes stored in *size. 854 855 In case of an error, no *size is set. 856 857 This funcation caches the UTF-8 encoded string in the unicodeobject 858 and subsequent calls will return the same string. The memory is relased 859 when the unicodeobject is deallocated. 860 861 _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to 862 support the previous internal function with the same behaviour. 863 864 *** This API is for interpreter INTERNAL USE ONLY and will likely 865 *** be removed or changed in the future. 866 867 *** If you need to access the Unicode object as UTF-8 bytes string, 868 *** please use PyUnicode_AsUTF8String() instead. 869*/ 870 871#ifndef Py_LIMITED_API 872PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize( 873 PyObject *unicode, 874 Py_ssize_t *size); 875#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize 876#endif 877 878/* Returns a pointer to the default encoding (UTF-8) of the 879 Unicode object unicode. 880 881 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation 882 in the unicodeobject. 883 884 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to 885 support the previous internal function with the same behaviour. 886 887 Use of this API is DEPRECATED since no size information can be 888 extracted from the returned data. 889 890 *** This API is for interpreter INTERNAL USE ONLY and will likely 891 *** be removed or changed for Python 3.1. 892 893 *** If you need to access the Unicode object as UTF-8 bytes string, 894 *** please use PyUnicode_AsUTF8String() instead. 895 896*/ 897 898#ifndef Py_LIMITED_API 899PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode); 900#define _PyUnicode_AsString PyUnicode_AsUTF8 901#endif 902 903/* Returns "utf-8". */ 904 905PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); 906 907/* --- Generic Codecs ----------------------------------------------------- */ 908 909/* Create a Unicode object by decoding the encoded string s of the 910 given size. */ 911 912PyAPI_FUNC(PyObject*) PyUnicode_Decode( 913 const char *s, /* encoded string */ 914 Py_ssize_t size, /* size of buffer */ 915 const char *encoding, /* encoding */ 916 const char *errors /* error handling */ 917 ); 918 919/* Decode a Unicode object unicode and return the result as Python 920 object. */ 921 922PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( 923 PyObject *unicode, /* Unicode object */ 924 const char *encoding, /* encoding */ 925 const char *errors /* error handling */ 926 ); 927 928/* Decode a Unicode object unicode and return the result as Unicode 929 object. */ 930 931PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( 932 PyObject *unicode, /* Unicode object */ 933 const char *encoding, /* encoding */ 934 const char *errors /* error handling */ 935 ); 936 937/* Encodes a Py_UNICODE buffer of the given size and returns a 938 Python string object. */ 939 940#ifndef Py_LIMITED_API 941PyAPI_FUNC(PyObject*) PyUnicode_Encode( 942 const Py_UNICODE *s, /* Unicode char buffer */ 943 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ 944 const char *encoding, /* encoding */ 945 const char *errors /* error handling */ 946 ); 947#endif 948 949/* Encodes a Unicode object and returns the result as Python 950 object. */ 951 952PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( 953 PyObject *unicode, /* Unicode object */ 954 const char *encoding, /* encoding */ 955 const char *errors /* error handling */ 956 ); 957 958/* Encodes a Unicode object and returns the result as Python string 959 object. */ 960 961PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( 962 PyObject *unicode, /* Unicode object */ 963 const char *encoding, /* encoding */ 964 const char *errors /* error handling */ 965 ); 966 967/* Encodes a Unicode object and returns the result as Unicode 968 object. */ 969 970PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( 971 PyObject *unicode, /* Unicode object */ 972 const char *encoding, /* encoding */ 973 const char *errors /* error handling */ 974 ); 975 976/* Build an encoding map. */ 977 978PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( 979 PyObject* string /* 256 character map */ 980 ); 981 982/* --- UTF-7 Codecs ------------------------------------------------------- */ 983 984PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( 985 const char *string, /* UTF-7 encoded string */ 986 Py_ssize_t length, /* size of string */ 987 const char *errors /* error handling */ 988 ); 989 990PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( 991 const char *string, /* UTF-7 encoded string */ 992 Py_ssize_t length, /* size of string */ 993 const char *errors, /* error handling */ 994 Py_ssize_t *consumed /* bytes consumed */ 995 ); 996 997#ifndef Py_LIMITED_API 998PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( 999 const Py_UNICODE *data, /* Unicode char buffer */ 1000 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1001 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 1002 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 1003 const char *errors /* error handling */ 1004 ); 1005#endif 1006 1007/* --- UTF-8 Codecs ------------------------------------------------------- */ 1008 1009PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( 1010 const char *string, /* UTF-8 encoded string */ 1011 Py_ssize_t length, /* size of string */ 1012 const char *errors /* error handling */ 1013 ); 1014 1015PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( 1016 const char *string, /* UTF-8 encoded string */ 1017 Py_ssize_t length, /* size of string */ 1018 const char *errors, /* error handling */ 1019 Py_ssize_t *consumed /* bytes consumed */ 1020 ); 1021 1022PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( 1023 PyObject *unicode /* Unicode object */ 1024 ); 1025 1026#ifndef Py_LIMITED_API 1027PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( 1028 PyObject *unicode, 1029 const char *errors); 1030 1031PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( 1032 const Py_UNICODE *data, /* Unicode char buffer */ 1033 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1034 const char *errors /* error handling */ 1035 ); 1036#endif 1037 1038/* --- UTF-32 Codecs ------------------------------------------------------ */ 1039 1040/* Decodes length bytes from a UTF-32 encoded buffer string and returns 1041 the corresponding Unicode object. 1042 1043 errors (if non-NULL) defines the error handling. It defaults 1044 to "strict". 1045 1046 If byteorder is non-NULL, the decoder starts decoding using the 1047 given byte order: 1048 1049 *byteorder == -1: little endian 1050 *byteorder == 0: native order 1051 *byteorder == 1: big endian 1052 1053 In native mode, the first four bytes of the stream are checked for a 1054 BOM mark. If found, the BOM mark is analysed, the byte order 1055 adjusted and the BOM skipped. In the other modes, no BOM mark 1056 interpretation is done. After completion, *byteorder is set to the 1057 current byte order at the end of input data. 1058 1059 If byteorder is NULL, the codec starts in native order mode. 1060 1061*/ 1062 1063PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( 1064 const char *string, /* UTF-32 encoded string */ 1065 Py_ssize_t length, /* size of string */ 1066 const char *errors, /* error handling */ 1067 int *byteorder /* pointer to byteorder to use 1068 0=native;-1=LE,1=BE; updated on 1069 exit */ 1070 ); 1071 1072PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( 1073 const char *string, /* UTF-32 encoded string */ 1074 Py_ssize_t length, /* size of string */ 1075 const char *errors, /* error handling */ 1076 int *byteorder, /* pointer to byteorder to use 1077 0=native;-1=LE,1=BE; updated on 1078 exit */ 1079 Py_ssize_t *consumed /* bytes consumed */ 1080 ); 1081 1082/* Returns a Python string using the UTF-32 encoding in native byte 1083 order. The string always starts with a BOM mark. */ 1084 1085PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( 1086 PyObject *unicode /* Unicode object */ 1087 ); 1088 1089/* Returns a Python string object holding the UTF-32 encoded value of 1090 the Unicode data. 1091 1092 If byteorder is not 0, output is written according to the following 1093 byte order: 1094 1095 byteorder == -1: little endian 1096 byteorder == 0: native byte order (writes a BOM mark) 1097 byteorder == 1: big endian 1098 1099 If byteorder is 0, the output string will always start with the 1100 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1101 prepended. 1102 1103*/ 1104 1105#ifndef Py_LIMITED_API 1106PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( 1107 const Py_UNICODE *data, /* Unicode char buffer */ 1108 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1109 const char *errors, /* error handling */ 1110 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1111 ); 1112#endif 1113 1114/* --- UTF-16 Codecs ------------------------------------------------------ */ 1115 1116/* Decodes length bytes from a UTF-16 encoded buffer string and returns 1117 the corresponding Unicode object. 1118 1119 errors (if non-NULL) defines the error handling. It defaults 1120 to "strict". 1121 1122 If byteorder is non-NULL, the decoder starts decoding using the 1123 given byte order: 1124 1125 *byteorder == -1: little endian 1126 *byteorder == 0: native order 1127 *byteorder == 1: big endian 1128 1129 In native mode, the first two bytes of the stream are checked for a 1130 BOM mark. If found, the BOM mark is analysed, the byte order 1131 adjusted and the BOM skipped. In the other modes, no BOM mark 1132 interpretation is done. After completion, *byteorder is set to the 1133 current byte order at the end of input data. 1134 1135 If byteorder is NULL, the codec starts in native order mode. 1136 1137*/ 1138 1139PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( 1140 const char *string, /* UTF-16 encoded string */ 1141 Py_ssize_t length, /* size of string */ 1142 const char *errors, /* error handling */ 1143 int *byteorder /* pointer to byteorder to use 1144 0=native;-1=LE,1=BE; updated on 1145 exit */ 1146 ); 1147 1148PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( 1149 const char *string, /* UTF-16 encoded string */ 1150 Py_ssize_t length, /* size of string */ 1151 const char *errors, /* error handling */ 1152 int *byteorder, /* pointer to byteorder to use 1153 0=native;-1=LE,1=BE; updated on 1154 exit */ 1155 Py_ssize_t *consumed /* bytes consumed */ 1156 ); 1157 1158/* Returns a Python string using the UTF-16 encoding in native byte 1159 order. The string always starts with a BOM mark. */ 1160 1161PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( 1162 PyObject *unicode /* Unicode object */ 1163 ); 1164 1165/* Returns a Python string object holding the UTF-16 encoded value of 1166 the Unicode data. 1167 1168 If byteorder is not 0, output is written according to the following 1169 byte order: 1170 1171 byteorder == -1: little endian 1172 byteorder == 0: native byte order (writes a BOM mark) 1173 byteorder == 1: big endian 1174 1175 If byteorder is 0, the output string will always start with the 1176 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1177 prepended. 1178 1179 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1180 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1181 at a later point without compromising the APIs. 1182 1183*/ 1184 1185#ifndef Py_LIMITED_API 1186PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( 1187 const Py_UNICODE *data, /* Unicode char buffer */ 1188 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1189 const char *errors, /* error handling */ 1190 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1191 ); 1192#endif 1193 1194/* --- Unicode-Escape Codecs ---------------------------------------------- */ 1195 1196PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( 1197 const char *string, /* Unicode-Escape encoded string */ 1198 Py_ssize_t length, /* size of string */ 1199 const char *errors /* error handling */ 1200 ); 1201 1202PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( 1203 PyObject *unicode /* Unicode object */ 1204 ); 1205 1206#ifndef Py_LIMITED_API 1207PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( 1208 const Py_UNICODE *data, /* Unicode char buffer */ 1209 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1210 ); 1211#endif 1212 1213/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ 1214 1215PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( 1216 const char *string, /* Raw-Unicode-Escape encoded string */ 1217 Py_ssize_t length, /* size of string */ 1218 const char *errors /* error handling */ 1219 ); 1220 1221PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( 1222 PyObject *unicode /* Unicode object */ 1223 ); 1224 1225#ifndef Py_LIMITED_API 1226PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( 1227 const Py_UNICODE *data, /* Unicode char buffer */ 1228 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1229 ); 1230#endif 1231 1232/* --- Unicode Internal Codec --------------------------------------------- 1233 1234 Only for internal use in _codecsmodule.c */ 1235 1236#ifndef Py_LIMITED_API 1237PyObject *_PyUnicode_DecodeUnicodeInternal( 1238 const char *string, 1239 Py_ssize_t length, 1240 const char *errors 1241 ); 1242#endif 1243 1244/* --- Latin-1 Codecs ----------------------------------------------------- 1245 1246 Note: Latin-1 corresponds to the first 256 Unicode ordinals. 1247 1248*/ 1249 1250PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( 1251 const char *string, /* Latin-1 encoded string */ 1252 Py_ssize_t length, /* size of string */ 1253 const char *errors /* error handling */ 1254 ); 1255 1256PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( 1257 PyObject *unicode /* Unicode object */ 1258 ); 1259 1260#ifndef Py_LIMITED_API 1261PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( 1262 PyObject* unicode, 1263 const char* errors); 1264 1265PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( 1266 const Py_UNICODE *data, /* Unicode char buffer */ 1267 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1268 const char *errors /* error handling */ 1269 ); 1270#endif 1271 1272/* --- ASCII Codecs ------------------------------------------------------- 1273 1274 Only 7-bit ASCII data is excepted. All other codes generate errors. 1275 1276*/ 1277 1278PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( 1279 const char *string, /* ASCII encoded string */ 1280 Py_ssize_t length, /* size of string */ 1281 const char *errors /* error handling */ 1282 ); 1283 1284PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( 1285 PyObject *unicode /* Unicode object */ 1286 ); 1287 1288#ifndef Py_LIMITED_API 1289PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( 1290 PyObject* unicode, 1291 const char* errors); 1292 1293PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( 1294 const Py_UNICODE *data, /* Unicode char buffer */ 1295 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1296 const char *errors /* error handling */ 1297 ); 1298#endif 1299 1300/* --- Character Map Codecs ----------------------------------------------- 1301 1302 This codec uses mappings to encode and decode characters. 1303 1304 Decoding mappings must map single string characters to single 1305 Unicode characters, integers (which are then interpreted as Unicode 1306 ordinals) or None (meaning "undefined mapping" and causing an 1307 error). 1308 1309 Encoding mappings must map single Unicode characters to single 1310 string characters, integers (which are then interpreted as Latin-1 1311 ordinals) or None (meaning "undefined mapping" and causing an 1312 error). 1313 1314 If a character lookup fails with a LookupError, the character is 1315 copied as-is meaning that its ordinal value will be interpreted as 1316 Unicode or Latin-1 ordinal resp. Because of this mappings only need 1317 to contain those mappings which map characters to different code 1318 points. 1319 1320*/ 1321 1322PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( 1323 const char *string, /* Encoded string */ 1324 Py_ssize_t length, /* size of string */ 1325 PyObject *mapping, /* character mapping 1326 (char ordinal -> unicode ordinal) */ 1327 const char *errors /* error handling */ 1328 ); 1329 1330PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( 1331 PyObject *unicode, /* Unicode object */ 1332 PyObject *mapping /* character mapping 1333 (unicode ordinal -> char ordinal) */ 1334 ); 1335 1336#ifndef Py_LIMITED_API 1337PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( 1338 const Py_UNICODE *data, /* Unicode char buffer */ 1339 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1340 PyObject *mapping, /* character mapping 1341 (unicode ordinal -> char ordinal) */ 1342 const char *errors /* error handling */ 1343 ); 1344#endif 1345 1346/* Translate a Py_UNICODE buffer of the given length by applying a 1347 character mapping table to it and return the resulting Unicode 1348 object. 1349 1350 The mapping table must map Unicode ordinal integers to Unicode 1351 ordinal integers or None (causing deletion of the character). 1352 1353 Mapping tables may be dictionaries or sequences. Unmapped character 1354 ordinals (ones which cause a LookupError) are left untouched and 1355 are copied as-is. 1356 1357*/ 1358 1359#ifndef Py_LIMITED_API 1360PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( 1361 const Py_UNICODE *data, /* Unicode char buffer */ 1362 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1363 PyObject *table, /* Translate table */ 1364 const char *errors /* error handling */ 1365 ); 1366#endif 1367 1368#ifdef HAVE_MBCS 1369 1370/* --- MBCS codecs for Windows -------------------------------------------- */ 1371 1372PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( 1373 const char *string, /* MBCS encoded string */ 1374 Py_ssize_t length, /* size of string */ 1375 const char *errors /* error handling */ 1376 ); 1377 1378PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( 1379 const char *string, /* MBCS encoded string */ 1380 Py_ssize_t length, /* size of string */ 1381 const char *errors, /* error handling */ 1382 Py_ssize_t *consumed /* bytes consumed */ 1383 ); 1384 1385PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( 1386 PyObject *unicode /* Unicode object */ 1387 ); 1388 1389#ifndef Py_LIMITED_API 1390PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( 1391 const Py_UNICODE *data, /* Unicode char buffer */ 1392 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1393 const char *errors /* error handling */ 1394 ); 1395#endif 1396 1397#endif /* HAVE_MBCS */ 1398 1399/* --- Decimal Encoder ---------------------------------------------------- */ 1400 1401/* Takes a Unicode string holding a decimal value and writes it into 1402 an output buffer using standard ASCII digit codes. 1403 1404 The output buffer has to provide at least length+1 bytes of storage 1405 area. The output string is 0-terminated. 1406 1407 The encoder converts whitespace to ' ', decimal characters to their 1408 corresponding ASCII digit and all other Latin-1 characters except 1409 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1410 are treated as errors. This includes embedded NULL bytes. 1411 1412 Error handling is defined by the errors argument: 1413 1414 NULL or "strict": raise a ValueError 1415 "ignore": ignore the wrong characters (these are not copied to the 1416 output buffer) 1417 "replace": replaces illegal characters with '?' 1418 1419 Returns 0 on success, -1 on failure. 1420 1421*/ 1422 1423#ifndef Py_LIMITED_API 1424PyAPI_FUNC(int) PyUnicode_EncodeDecimal( 1425 Py_UNICODE *s, /* Unicode buffer */ 1426 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1427 char *output, /* Output buffer; must have size >= length */ 1428 const char *errors /* error handling */ 1429 ); 1430#endif 1431 1432/* Transforms code points that have decimal digit property to the 1433 corresponding ASCII digit code points. 1434 1435 Returns a new Unicode string on success, NULL on failure. 1436*/ 1437 1438#ifndef Py_LIMITED_API 1439PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( 1440 Py_UNICODE *s, /* Unicode buffer */ 1441 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 1442 ); 1443#endif 1444 1445/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject 1446 as argument instead of a raw buffer and length. This function additionally 1447 transforms spaces to ASCII because this is what the callers in longobject, 1448 floatobject, and complexobject did anyways. */ 1449 1450#ifndef Py_LIMITED_API 1451PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( 1452 PyObject *unicode /* Unicode object */ 1453 ); 1454#endif 1455 1456/* --- File system encoding ---------------------------------------------- */ 1457 1458/* ParseTuple converter: encode str objects to bytes using 1459 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 1460 1461PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); 1462 1463/* ParseTuple converter: decode bytes objects to unicode using 1464 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 1465 1466PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); 1467 1468/* Decode a null-terminated string using Py_FileSystemDefaultEncoding 1469 and the "surrogateescape" error handler. 1470 1471 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1472 encoding. 1473 1474 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 1475*/ 1476 1477PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( 1478 const char *s /* encoded string */ 1479 ); 1480 1481/* Decode a string using Py_FileSystemDefaultEncoding 1482 and the "surrogateescape" error handler. 1483 1484 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1485 encoding. 1486*/ 1487 1488PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( 1489 const char *s, /* encoded string */ 1490 Py_ssize_t size /* size */ 1491 ); 1492 1493/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the 1494 "surrogateescape" error handler, and return bytes. 1495 1496 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1497 encoding. 1498*/ 1499 1500PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( 1501 PyObject *unicode 1502 ); 1503 1504/* --- Methods & Slots ---------------------------------------------------- 1505 1506 These are capable of handling Unicode objects and strings on input 1507 (we refer to them as strings in the descriptions) and return 1508 Unicode objects or integers as apporpriate. */ 1509 1510/* Concat two strings giving a new Unicode string. */ 1511 1512PyAPI_FUNC(PyObject*) PyUnicode_Concat( 1513 PyObject *left, /* Left string */ 1514 PyObject *right /* Right string */ 1515 ); 1516 1517/* Concat two strings and put the result in *pleft 1518 (sets *pleft to NULL on error) */ 1519 1520PyAPI_FUNC(void) PyUnicode_Append( 1521 PyObject **pleft, /* Pointer to left string */ 1522 PyObject *right /* Right string */ 1523 ); 1524 1525/* Concat two strings, put the result in *pleft and drop the right object 1526 (sets *pleft to NULL on error) */ 1527 1528PyAPI_FUNC(void) PyUnicode_AppendAndDel( 1529 PyObject **pleft, /* Pointer to left string */ 1530 PyObject *right /* Right string */ 1531 ); 1532 1533/* Split a string giving a list of Unicode strings. 1534 1535 If sep is NULL, splitting will be done at all whitespace 1536 substrings. Otherwise, splits occur at the given separator. 1537 1538 At most maxsplit splits will be done. If negative, no limit is set. 1539 1540 Separators are not included in the resulting list. 1541 1542*/ 1543 1544PyAPI_FUNC(PyObject*) PyUnicode_Split( 1545 PyObject *s, /* String to split */ 1546 PyObject *sep, /* String separator */ 1547 Py_ssize_t maxsplit /* Maxsplit count */ 1548 ); 1549 1550/* Dito, but split at line breaks. 1551 1552 CRLF is considered to be one line break. Line breaks are not 1553 included in the resulting list. */ 1554 1555PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( 1556 PyObject *s, /* String to split */ 1557 int keepends /* If true, line end markers are included */ 1558 ); 1559 1560/* Partition a string using a given separator. */ 1561 1562PyAPI_FUNC(PyObject*) PyUnicode_Partition( 1563 PyObject *s, /* String to partition */ 1564 PyObject *sep /* String separator */ 1565 ); 1566 1567/* Partition a string using a given separator, searching from the end of the 1568 string. */ 1569 1570PyAPI_FUNC(PyObject*) PyUnicode_RPartition( 1571 PyObject *s, /* String to partition */ 1572 PyObject *sep /* String separator */ 1573 ); 1574 1575/* Split a string giving a list of Unicode strings. 1576 1577 If sep is NULL, splitting will be done at all whitespace 1578 substrings. Otherwise, splits occur at the given separator. 1579 1580 At most maxsplit splits will be done. But unlike PyUnicode_Split 1581 PyUnicode_RSplit splits from the end of the string. If negative, 1582 no limit is set. 1583 1584 Separators are not included in the resulting list. 1585 1586*/ 1587 1588PyAPI_FUNC(PyObject*) PyUnicode_RSplit( 1589 PyObject *s, /* String to split */ 1590 PyObject *sep, /* String separator */ 1591 Py_ssize_t maxsplit /* Maxsplit count */ 1592 ); 1593 1594/* Translate a string by applying a character mapping table to it and 1595 return the resulting Unicode object. 1596 1597 The mapping table must map Unicode ordinal integers to Unicode 1598 ordinal integers or None (causing deletion of the character). 1599 1600 Mapping tables may be dictionaries or sequences. Unmapped character 1601 ordinals (ones which cause a LookupError) are left untouched and 1602 are copied as-is. 1603 1604*/ 1605 1606PyAPI_FUNC(PyObject *) PyUnicode_Translate( 1607 PyObject *str, /* String */ 1608 PyObject *table, /* Translate table */ 1609 const char *errors /* error handling */ 1610 ); 1611 1612/* Join a sequence of strings using the given separator and return 1613 the resulting Unicode string. */ 1614 1615PyAPI_FUNC(PyObject*) PyUnicode_Join( 1616 PyObject *separator, /* Separator string */ 1617 PyObject *seq /* Sequence object */ 1618 ); 1619 1620/* Return 1 if substr matches str[start:end] at the given tail end, 0 1621 otherwise. */ 1622 1623PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( 1624 PyObject *str, /* String */ 1625 PyObject *substr, /* Prefix or Suffix string */ 1626 Py_ssize_t start, /* Start index */ 1627 Py_ssize_t end, /* Stop index */ 1628 int direction /* Tail end: -1 prefix, +1 suffix */ 1629 ); 1630 1631/* Return the first position of substr in str[start:end] using the 1632 given search direction or -1 if not found. -2 is returned in case 1633 an error occurred and an exception is set. */ 1634 1635PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( 1636 PyObject *str, /* String */ 1637 PyObject *substr, /* Substring to find */ 1638 Py_ssize_t start, /* Start index */ 1639 Py_ssize_t end, /* Stop index */ 1640 int direction /* Find direction: +1 forward, -1 backward */ 1641 ); 1642 1643/* Like PyUnicode_Find, but search for single character only. */ 1644PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( 1645 PyObject *str, 1646 Py_UCS4 ch, 1647 Py_ssize_t start, 1648 Py_ssize_t end, 1649 int direction 1650 ); 1651 1652/* Count the number of occurrences of substr in str[start:end]. */ 1653 1654PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( 1655 PyObject *str, /* String */ 1656 PyObject *substr, /* Substring to count */ 1657 Py_ssize_t start, /* Start index */ 1658 Py_ssize_t end /* Stop index */ 1659 ); 1660 1661/* Replace at most maxcount occurrences of substr in str with replstr 1662 and return the resulting Unicode object. */ 1663 1664PyAPI_FUNC(PyObject *) PyUnicode_Replace( 1665 PyObject *str, /* String */ 1666 PyObject *substr, /* Substring to find */ 1667 PyObject *replstr, /* Substring to replace */ 1668 Py_ssize_t maxcount /* Max. number of replacements to apply; 1669 -1 = all */ 1670 ); 1671 1672/* Compare two strings and return -1, 0, 1 for less than, equal, 1673 greater than resp. */ 1674 1675PyAPI_FUNC(int) PyUnicode_Compare( 1676 PyObject *left, /* Left string */ 1677 PyObject *right /* Right string */ 1678 ); 1679 1680PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( 1681 PyObject *left, 1682 const char *right /* ASCII-encoded string */ 1683 ); 1684 1685/* Rich compare two strings and return one of the following: 1686 1687 - NULL in case an exception was raised 1688 - Py_True or Py_False for successfuly comparisons 1689 - Py_NotImplemented in case the type combination is unknown 1690 1691 Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in 1692 case the conversion of the arguments to Unicode fails with a 1693 UnicodeDecodeError. 1694 1695 Possible values for op: 1696 1697 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 1698 1699*/ 1700 1701PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( 1702 PyObject *left, /* Left string */ 1703 PyObject *right, /* Right string */ 1704 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ 1705 ); 1706 1707/* Apply a argument tuple or dictionary to a format string and return 1708 the resulting Unicode string. */ 1709 1710PyAPI_FUNC(PyObject *) PyUnicode_Format( 1711 PyObject *format, /* Format string */ 1712 PyObject *args /* Argument tuple or dictionary */ 1713 ); 1714 1715/* Checks whether element is contained in container and return 1/0 1716 accordingly. 1717 1718 element has to coerce to an one element Unicode string. -1 is 1719 returned in case of an error. */ 1720 1721PyAPI_FUNC(int) PyUnicode_Contains( 1722 PyObject *container, /* Container string */ 1723 PyObject *element /* Element string */ 1724 ); 1725 1726/* Checks whether argument is a valid identifier. */ 1727 1728PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); 1729 1730#ifndef Py_LIMITED_API 1731/* Externally visible for str.strip(unicode) */ 1732PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( 1733 PyUnicodeObject *self, 1734 int striptype, 1735 PyObject *sepobj 1736 ); 1737#endif 1738 1739/* Using the current locale, insert the thousands grouping 1740 into the string pointed to by buffer. For the argument descriptions, 1741 see Objects/stringlib/localeutil.h */ 1742 1743#ifndef Py_LIMITED_API 1744PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer, 1745 Py_ssize_t n_buffer, 1746 Py_UNICODE *digits, 1747 Py_ssize_t n_digits, 1748 Py_ssize_t min_width); 1749#endif 1750 1751/* Using explicit passed-in values, insert the thousands grouping 1752 into the string pointed to by buffer. For the argument descriptions, 1753 see Objects/stringlib/localeutil.h */ 1754#ifndef Py_LIMITED_API 1755PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( 1756 int kind, 1757 void *buffer, 1758 Py_ssize_t n_buffer, 1759 void *digits, 1760 Py_ssize_t n_digits, 1761 Py_ssize_t min_width, 1762 const char *grouping, 1763 const char *thousands_sep); 1764#endif 1765/* === Characters Type APIs =============================================== */ 1766 1767/* Helper array used by Py_UNICODE_ISSPACE(). */ 1768 1769#ifndef Py_LIMITED_API 1770PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; 1771 1772/* These should not be used directly. Use the Py_UNICODE_IS* and 1773 Py_UNICODE_TO* macros instead. 1774 1775 These APIs are implemented in Objects/unicodectype.c. 1776 1777*/ 1778 1779PyAPI_FUNC(int) _PyUnicode_IsLowercase( 1780 Py_UCS4 ch /* Unicode character */ 1781 ); 1782 1783PyAPI_FUNC(int) _PyUnicode_IsUppercase( 1784 Py_UCS4 ch /* Unicode character */ 1785 ); 1786 1787PyAPI_FUNC(int) _PyUnicode_IsTitlecase( 1788 Py_UCS4 ch /* Unicode character */ 1789 ); 1790 1791PyAPI_FUNC(int) _PyUnicode_IsXidStart( 1792 Py_UCS4 ch /* Unicode character */ 1793 ); 1794 1795PyAPI_FUNC(int) _PyUnicode_IsXidContinue( 1796 Py_UCS4 ch /* Unicode character */ 1797 ); 1798 1799PyAPI_FUNC(int) _PyUnicode_IsWhitespace( 1800 const Py_UCS4 ch /* Unicode character */ 1801 ); 1802 1803PyAPI_FUNC(int) _PyUnicode_IsLinebreak( 1804 const Py_UCS4 ch /* Unicode character */ 1805 ); 1806 1807PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( 1808 Py_UCS4 ch /* Unicode character */ 1809 ); 1810 1811PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( 1812 Py_UCS4 ch /* Unicode character */ 1813 ); 1814 1815PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( 1816 Py_UCS4 ch /* Unicode character */ 1817 ); 1818 1819PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( 1820 Py_UCS4 ch /* Unicode character */ 1821 ); 1822 1823PyAPI_FUNC(int) _PyUnicode_ToDigit( 1824 Py_UCS4 ch /* Unicode character */ 1825 ); 1826 1827PyAPI_FUNC(double) _PyUnicode_ToNumeric( 1828 Py_UCS4 ch /* Unicode character */ 1829 ); 1830 1831PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( 1832 Py_UCS4 ch /* Unicode character */ 1833 ); 1834 1835PyAPI_FUNC(int) _PyUnicode_IsDigit( 1836 Py_UCS4 ch /* Unicode character */ 1837 ); 1838 1839PyAPI_FUNC(int) _PyUnicode_IsNumeric( 1840 Py_UCS4 ch /* Unicode character */ 1841 ); 1842 1843PyAPI_FUNC(int) _PyUnicode_IsPrintable( 1844 Py_UCS4 ch /* Unicode character */ 1845 ); 1846 1847PyAPI_FUNC(int) _PyUnicode_IsAlpha( 1848 Py_UCS4 ch /* Unicode character */ 1849 ); 1850 1851PyAPI_FUNC(size_t) Py_UNICODE_strlen( 1852 const Py_UNICODE *u 1853 ); 1854 1855PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( 1856 Py_UNICODE *s1, 1857 const Py_UNICODE *s2); 1858 1859PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( 1860 Py_UNICODE *s1, const Py_UNICODE *s2); 1861 1862PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( 1863 Py_UNICODE *s1, 1864 const Py_UNICODE *s2, 1865 size_t n); 1866 1867PyAPI_FUNC(int) Py_UNICODE_strcmp( 1868 const Py_UNICODE *s1, 1869 const Py_UNICODE *s2 1870 ); 1871 1872PyAPI_FUNC(int) Py_UNICODE_strncmp( 1873 const Py_UNICODE *s1, 1874 const Py_UNICODE *s2, 1875 size_t n 1876 ); 1877 1878PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( 1879 const Py_UNICODE *s, 1880 Py_UNICODE c 1881 ); 1882 1883PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( 1884 const Py_UNICODE *s, 1885 Py_UNICODE c 1886 ); 1887 1888PyAPI_FUNC(size_t) Py_UCS4_strlen( 1889 const Py_UCS4 *u 1890 ); 1891 1892PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy( 1893 Py_UCS4 *s1, 1894 const Py_UCS4 *s2); 1895 1896PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat( 1897 Py_UCS4 *s1, const Py_UCS4 *s2); 1898 1899PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy( 1900 Py_UCS4 *s1, 1901 const Py_UCS4 *s2, 1902 size_t n); 1903 1904PyAPI_FUNC(int) Py_UCS4_strcmp( 1905 const Py_UCS4 *s1, 1906 const Py_UCS4 *s2 1907 ); 1908 1909PyAPI_FUNC(int) Py_UCS4_strncmp( 1910 const Py_UCS4 *s1, 1911 const Py_UCS4 *s2, 1912 size_t n 1913 ); 1914 1915PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr( 1916 const Py_UCS4 *s, 1917 Py_UCS4 c 1918 ); 1919 1920PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr( 1921 const Py_UCS4 *s, 1922 Py_UCS4 c 1923 ); 1924 1925/* Create a copy of a unicode string ending with a nul character. Return NULL 1926 and raise a MemoryError exception on memory allocation failure, otherwise 1927 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 1928 1929PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( 1930 PyObject *unicode 1931 ); 1932#endif /* Py_LIMITED_API */ 1933 1934#ifdef __cplusplus 1935} 1936#endif 1937#endif /* !Py_UNICODEOBJECT_H */ 1938