unicodeobject.h revision a0702ab1fe6bda8e1cbe1d5fedc3e0ba07e299dd
1#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
3
4#include <stdarg.h>
5
6/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10Unicode Integration Proposal. (See
11http://www.egenix.com/files/python/unicode-proposal.txt).
12
13Copyright (c) Corporation for National Research Initiatives.
14
15
16 Original header:
17 --------------------------------------------------------------------
18
19 * Yet another Unicode string type for Python.  This type supports the
20 * 16-bit Basic Multilingual Plane (BMP) only.
21 *
22 * Written by Fredrik Lundh, January 1999.
23 *
24 * Copyright (c) 1999 by Secret Labs AB.
25 * Copyright (c) 1999 by Fredrik Lundh.
26 *
27 * fredrik@pythonware.com
28 * http://www.pythonware.com
29 *
30 * --------------------------------------------------------------------
31 * This Unicode String Type is
32 *
33 * Copyright (c) 1999 by Secret Labs AB
34 * Copyright (c) 1999 by Fredrik Lundh
35 *
36 * By obtaining, using, and/or copying this software and/or its
37 * associated documentation, you agree that you have read, understood,
38 * and will comply with the following terms and conditions:
39 *
40 * Permission to use, copy, modify, and distribute this software and its
41 * associated documentation for any purpose and without fee is hereby
42 * granted, provided that the above copyright notice appears in all
43 * copies, and that both that copyright notice and this permission notice
44 * appear in supporting documentation, and that the name of Secret Labs
45 * AB or the author not be used in advertising or publicity pertaining to
46 * distribution of the software without specific, written prior
47 * permission.
48 *
49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51 * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56 * -------------------------------------------------------------------- */
57
58#include <ctype.h>
59
60/* === Internal API ======================================================= */
61
62/* --- Internal Unicode Format -------------------------------------------- */
63
64/* Python 3.x requires unicode */
65#define Py_USING_UNICODE
66
67#ifndef SIZEOF_WCHAR_T
68#error Must define SIZEOF_WCHAR_T
69#endif
70
71#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
72
73/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
74   Otherwise, Unicode strings are stored as UCS-2 (with limited support
75   for UTF-16) */
76
77#if Py_UNICODE_SIZE >= 4
78#define Py_UNICODE_WIDE
79#endif
80
81/* Set these flags if the platform has "wchar.h" and the
82   wchar_t type is a 16-bit unsigned type */
83/* #define HAVE_WCHAR_H */
84/* #define HAVE_USABLE_WCHAR_T */
85
86/* Py_UNICODE was the native Unicode storage format (code unit) used by
87   Python and represents a single Unicode element in the Unicode type.
88   With PEP 393, Py_UNICODE is deprected and replaced with a
89   typedef to wchar_t. */
90
91#ifndef Py_LIMITED_API
92#define PY_UNICODE_TYPE wchar_t
93typedef wchar_t Py_UNICODE;
94#endif
95
96/* If the compiler provides a wchar_t type we try to support it
97   through the interface functions PyUnicode_FromWideChar(),
98   PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
99
100#ifdef HAVE_USABLE_WCHAR_T
101# ifndef HAVE_WCHAR_H
102#  define HAVE_WCHAR_H
103# endif
104#endif
105
106#if defined(MS_WINDOWS)
107#  define HAVE_MBCS
108#endif
109
110#ifdef HAVE_WCHAR_H
111/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
112# ifdef _HAVE_BSDI
113#  include <time.h>
114# endif
115#  include <wchar.h>
116#endif
117
118/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve
119   unicode representations. */
120#if SIZEOF_INT >= 4
121typedef unsigned int Py_UCS4;
122#elif SIZEOF_LONG >= 4
123typedef unsigned long Py_UCS4;
124#else
125#error "Could not find a proper typedef for Py_UCS4"
126#endif
127
128typedef unsigned short Py_UCS2;
129typedef unsigned char Py_UCS1;
130
131/* --- Internal Unicode Operations ---------------------------------------- */
132
133/* Since splitting on whitespace is an important use case, and
134   whitespace in most situations is solely ASCII whitespace, we
135   optimize for the common case by using a quick look-up table
136   _Py_ascii_whitespace (see below) with an inlined check.
137
138 */
139#ifndef Py_LIMITED_API
140#define Py_UNICODE_ISSPACE(ch) \
141    ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
142
143#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
144#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
145#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
146#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
147
148#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
149#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
150#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
151
152#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
153#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
154#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
155#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
156
157#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
158#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
159#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
160
161#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
162
163#define Py_UNICODE_ISALNUM(ch) \
164       (Py_UNICODE_ISALPHA(ch) || \
165    Py_UNICODE_ISDECIMAL(ch) || \
166    Py_UNICODE_ISDIGIT(ch) || \
167    Py_UNICODE_ISNUMERIC(ch))
168
169#define Py_UNICODE_COPY(target, source, length) \
170    Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
171
172#define Py_UNICODE_FILL(target, value, length) \
173    do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
174    for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
175    } while (0)
176
177/* macros to work with surrogates */
178#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
179#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
180#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
181/* Join two surrogate characters and return a single Py_UCS4 value. */
182#define Py_UNICODE_JOIN_SURROGATES(high, low)  \
183    (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
184      ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
185
186/* Check if substring matches at given offset.  The offset must be
187   valid, and the substring must not be empty. */
188
189#define Py_UNICODE_MATCH(string, offset, substring) \
190    ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
191     ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
192     !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
193
194#endif /* Py_LIMITED_API */
195
196#ifdef __cplusplus
197extern "C" {
198#endif
199
200/* --- Unicode Type ------------------------------------------------------- */
201
202#ifndef Py_LIMITED_API
203
204/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
205   structure. state.ascii and state.compact are set, and the data
206   immediately follow the structure. utf8_length and wstr_length can be found
207   in the length field; the utf8 pointer is equal to the data pointer. */
208typedef struct {
209    PyObject_HEAD
210    Py_ssize_t length;          /* Number of code points in the string */
211    Py_hash_t hash;             /* Hash value; -1 if not set */
212    struct {
213        /*
214           SSTATE_NOT_INTERNED (0)
215           SSTATE_INTERNED_MORTAL (1)
216           SSTATE_INTERNED_IMMORTAL (2)
217
218           If interned != SSTATE_NOT_INTERNED, the two references from the
219           dictionary to this object are *not* counted in ob_refcnt.
220         */
221        unsigned int interned:2;
222        /* Character size:
223
224           PyUnicode_WCHAR_KIND (0): wchar_t*
225           PyUnicode_1BYTE_KIND (1): Py_UCS1*
226           PyUnicode_2BYTE_KIND (2): Py_UCS2*
227           PyUnicode_4BYTE_KIND (3): Py_UCS4*
228         */
229        unsigned int kind:2;
230        /* Compact is with respect to the allocation scheme. Compact unicode
231           objects only require one memory block while non-compact objects use
232           one block for the PyUnicodeObject struct and another for its data
233           buffer. */
234        unsigned int compact:1;
235        /* Compact objects which are ASCII-only also have the state.compact
236           flag set, and use the PyASCIIObject struct. */
237        unsigned int ascii:1;
238        /* The ready flag indicates whether the object layout is initialized
239           completely. This means that this is either a compact object, or
240           the data pointer is filled out. The bit is redundant, and helps
241           to minimize the test in PyUnicode_IS_READY(). */
242        unsigned int ready:1;
243    } state;
244    wchar_t *wstr;              /* wchar_t representation (null-terminated) */
245} PyASCIIObject;
246
247/* Non-ASCII strings allocated through PyUnicode_New use the
248   PyCompactUnicodeOject structure. state.compact is set, and the data
249   immediately follow the structure. */
250typedef struct {
251    PyASCIIObject _base;
252    Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
253                                 * terminating \0. */
254    char *utf8;                 /* UTF-8 representation (null-terminated) */
255    Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
256                                 * surrogates count as two code points. */
257} PyCompactUnicodeObject;
258
259/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
260   PyUnicodeObject structure. The actual string data is initially in the wstr
261   block, and copied into the data block using PyUnicode_Ready. */
262typedef struct {
263    PyCompactUnicodeObject _base;
264    union {
265        void *any;
266        Py_UCS1 *latin1;
267        Py_UCS2 *ucs2;
268        Py_UCS4 *ucs4;
269    } data;                     /* Canonical, smallest-form Unicode buffer */
270} PyUnicodeObject;
271#endif
272
273PyAPI_DATA(PyTypeObject) PyUnicode_Type;
274PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
275
276#define PyUnicode_Check(op) \
277                 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
278#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
279
280/* Fast access macros */
281#ifndef Py_LIMITED_API
282
283#define PyUnicode_WSTR_LENGTH(op) \
284    (((PyASCIIObject*)op)->state.ascii ?    \
285     ((PyASCIIObject*)op)->length :                    \
286     ((PyCompactUnicodeObject*)op)->wstr_length)
287
288/* Returns the deprecated Py_UNICODE representation's size in code units
289   (this includes surrogate pairs as 2 units).
290   If the Py_UNICODE representation is not available, it will be computed
291   on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
292
293#define PyUnicode_GET_SIZE(op) \
294    (assert(PyUnicode_Check(op)), \
295     (((PyASCIIObject *)(op))->wstr) ? \
296        PyUnicode_WSTR_LENGTH(op) :                   \
297        ((void)PyUnicode_AsUnicode((PyObject *)(op)), \
298         PyUnicode_WSTR_LENGTH(op)))
299
300#define PyUnicode_GET_DATA_SIZE(op) \
301    (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
302
303/* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
304   representation on demand.  Using this macro is very inefficient now,
305   try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
306   use PyUnicode_WRITE() and PyUnicode_READ(). */
307
308#define PyUnicode_AS_UNICODE(op) \
309    (assert(PyUnicode_Check(op)), \
310     (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
311      PyUnicode_AsUnicode((PyObject *)(op)))
312
313#define PyUnicode_AS_DATA(op) \
314    ((const char *)(PyUnicode_AS_UNICODE(op)))
315
316
317/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */
318
319/* Values for PyUnicodeObject.state: */
320
321/* Interning state. */
322#define SSTATE_NOT_INTERNED 0
323#define SSTATE_INTERNED_MORTAL 1
324#define SSTATE_INTERNED_IMMORTAL 2
325
326#define PyUnicode_IS_COMPACT_ASCII(op) (((PyASCIIObject*)op)->state.ascii)
327
328/* String contains only wstr byte characters.  This is only possible
329   when the string was created with a legacy API and PyUnicode_Ready()
330   has not been called yet.  */
331#define PyUnicode_WCHAR_KIND 0
332
333/* Return values of the PyUnicode_KIND() macro: */
334
335#define PyUnicode_1BYTE_KIND 1
336#define PyUnicode_2BYTE_KIND 2
337#define PyUnicode_4BYTE_KIND 3
338
339
340/* Return the number of bytes the string uses to represent single characters,
341   this can be 1, 2 or 4. */
342#define PyUnicode_CHARACTER_SIZE(op) \
343    (1 << (PyUnicode_KIND(op) - 1))
344
345/* Return pointers to the canonical representation casted as unsigned char,
346   Py_UCS2, or Py_UCS4 for direct character access.
347   No checks are performed, use PyUnicode_CHARACTER_SIZE or
348   PyUnicode_KIND() before to ensure these will work correctly. */
349
350#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
351#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
352#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
353
354/* Return true if the string is compact or 0 if not.
355   No type checks or Ready calls are performed. */
356#define PyUnicode_IS_COMPACT(op) \
357    (((PyASCIIObject*)(op))->state.compact)
358
359/* Return one of the PyUnicode_*_KIND values defined above. */
360#define PyUnicode_KIND(op) \
361    (assert(PyUnicode_Check(op)), \
362     assert(PyUnicode_IS_READY(op)),            \
363     ((PyASCIIObject *)(op))->state.kind)
364
365/* Return a void pointer to the raw unicode buffer. */
366#define _PyUnicode_COMPACT_DATA(op)                     \
367    (PyUnicode_IS_COMPACT_ASCII(op) ?                   \
368     ((void*)((PyASCIIObject*)(op) + 1)) :              \
369     ((void*)((PyCompactUnicodeObject*)(op) + 1)))
370
371#define _PyUnicode_NONCOMPACT_DATA(op)                  \
372    (assert(((PyUnicodeObject*)(op))->data.any),        \
373     ((((PyUnicodeObject *)(op))->data.any)))
374
375#define PyUnicode_DATA(op) \
376    (assert(PyUnicode_Check(op)), \
377     PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
378     _PyUnicode_NONCOMPACT_DATA(op))
379
380#define _PyUnicode_UTF8(op)                     \
381    (PyUnicode_IS_COMPACT_ASCII(op) ?           \
382     ((char*)((PyASCIIObject*)(op) + 1)) :      \
383     ((PyCompactUnicodeObject*)(op))->utf8)
384
385#define _PyUnicode_UTF8_LENGTH(op)                      \
386    (PyUnicode_IS_COMPACT_ASCII(op) ?                   \
387     ((PyASCIIObject*)(op))->length :                   \
388     ((PyCompactUnicodeObject*)(op))->utf8_length)
389
390/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
391
392   The index is a character index, the result is a size in bytes. */
393#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1))
394
395/* In the access macros below, "kind" may be evaluated more than once.
396   All other macro parameters are evaluated exactly once, so it is safe
397   to put side effects into them (such as increasing the index). */
398
399/* Write into the canonical representation, this macro does not do any sanity
400   checks and is intended for usage in loops.  The caller should cache the
401   kind and data pointers optained form other macro calls.
402   index is the index in the string (starts at 0) and value is the new
403   code point value which shoule be written to that location. */
404#define PyUnicode_WRITE(kind, data, index, value) \
405    do { \
406        switch ((kind)) { \
407        case PyUnicode_1BYTE_KIND: { \
408            ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
409            break; \
410        } \
411        case PyUnicode_2BYTE_KIND: { \
412            ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
413            break; \
414        } \
415        default: { \
416            assert((kind) == PyUnicode_4BYTE_KIND); \
417            ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
418        } \
419        } \
420    } while (0)
421
422/* Read a code point form the string's canonical representation.  No checks
423   or ready calls are performed. */
424#define PyUnicode_READ(kind, data, index) \
425    ((Py_UCS4) \
426    ((kind) == PyUnicode_1BYTE_KIND ? \
427        ((const unsigned char *)(data))[(index)] : \
428        ((kind) == PyUnicode_2BYTE_KIND ? \
429            ((const Py_UCS2 *)(data))[(index)] : \
430            ((const Py_UCS4 *)(data))[(index)] \
431        ) \
432    ))
433
434/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
435   calls PyUnicode_KIND() and might call it twice.  For single reads, use
436   PyUnicode_READ_CHAR, for multiple consecutive reads callers should
437   cache kind and use PyUnicode_READ instead. */
438#define PyUnicode_READ_CHAR(unicode, index) \
439    ((Py_UCS4) \
440    (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
441        ((const unsigned char *)(PyUnicode_DATA((unicode))))[(index)] : \
442        (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
443            ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
444            ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
445        ) \
446    ))
447
448/* Returns the length of the unicode string. The caller has to make sure that
449   the string has it's canonical representation set before calling
450   this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
451#define PyUnicode_GET_LENGTH(op)                \
452    (assert(PyUnicode_Check(op)),               \
453     assert(PyUnicode_IS_READY(op)),            \
454     ((PyASCIIObject *)(op))->length)
455
456
457/* Fast check to determine whether an object is ready. Equivalent to
458   PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
459
460#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
461
462/* PyUnicode_READY() does less work than PyUnicode_Ready() in the best
463   case.  If the canonical representation is not yet set, it will still call
464   PyUnicode_Ready().
465   Returns 0 on success and -1 on errors. */
466#define PyUnicode_READY(op)                        \
467    (assert(PyUnicode_Check(op)),                       \
468     (PyUnicode_IS_READY(op) ?                          \
469      0 : _PyUnicode_Ready((PyUnicodeObject *)(op))))
470
471/* Return a maximum character value which is suitable for creating another
472   string based on op.  This is always an approximation but more efficient
473   than interating over the string. */
474#define PyUnicode_MAX_CHAR_VALUE(op) \
475    (assert(PyUnicode_IS_READY(op)),                                    \
476     (PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f:                            \
477      (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
478       (PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \
479        (0x7fU) : (0xffU)                                                 \
480           ) :                                                          \
481       (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
482        (0xffffU) : (0x10ffffU)                                           \
483           ))))
484
485#endif
486
487/* --- Constants ---------------------------------------------------------- */
488
489/* This Unicode character will be used as replacement character during
490   decoding if the errors argument is set to "replace". Note: the
491   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
492   Unicode 3.0. */
493
494#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
495
496/* === Public API ========================================================= */
497
498/* --- Plain Py_UNICODE --------------------------------------------------- */
499
500/* With PEP 393, this is the recommended way to allocate a new unicode object.
501   This function will allocate the object and its buffer in a single memory
502   block.  Objects created using this function are not resizable. */
503#ifndef Py_LIMITED_API
504PyAPI_FUNC(PyObject*) PyUnicode_New(
505    Py_ssize_t size,            /* Number of code points in the new string */
506    Py_UCS4 maxchar             /* maximum code point value in the string */
507    );
508#endif
509
510/* Initializes the canonical string representation from a the deprected
511   wstr/Py_UNICODE representation.  This function is used to convert
512   unicode objects which were created using the old API to the new flexible
513   format introduced with PEP 393.  The PyUnicode_READY() macro can be
514   more efficient if the string is already ready. */
515#ifndef Py_LIMITED_API
516PyAPI_FUNC(int) _PyUnicode_Ready(
517    PyUnicodeObject *unicode    /* Unicode object */
518    );
519#endif
520
521/* Copy character from one unicode object into another, this function performs
522   character conversion when necessary and falls back to memcpy if possible.
523
524   Fail if to is too small (smaller than how_many or smaller than
525   len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
526   kind(to), or if to has more than 1 reference.
527
528   Return the number of written character, or return -1 and raise an exception
529   on error.
530
531   Pseudo-code:
532
533       how_many = min(how_many, len(from) - from_start)
534       to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
535       return how_many
536
537   Note: The function doesn't write a terminating null character.
538   */
539#ifndef Py_LIMITED_API
540PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
541    PyObject *to,
542    Py_ssize_t to_start,
543    PyObject *from,
544    Py_ssize_t from_start,
545    Py_ssize_t how_many
546    );
547#endif
548
549/* Create a Unicode Object from the Py_UNICODE buffer u of the given
550   size.
551
552   u may be NULL which causes the contents to be undefined. It is the
553   user's responsibility to fill in the needed data afterwards. Note
554   that modifying the Unicode object contents after construction is
555   only allowed if u was set to NULL.
556
557   The buffer is copied into the new object. */
558
559#ifndef Py_LIMITED_API
560PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
561    const Py_UNICODE *u,        /* Unicode buffer */
562    Py_ssize_t size             /* size of buffer */
563    );
564#endif
565
566/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
567PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
568    const char *u,             /* UTF-8 encoded string */
569    Py_ssize_t size            /* size of buffer */
570    );
571
572/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
573   UTF-8 encoded bytes.  The size is determined with strlen(). */
574PyAPI_FUNC(PyObject*) PyUnicode_FromString(
575    const char *u              /* UTF-8 encoded string */
576    );
577
578#ifndef Py_LIMITED_API
579PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
580    int kind,
581    const void *buffer,
582    Py_ssize_t size);
583#endif
584
585PyAPI_FUNC(PyObject*) PyUnicode_Substring(
586    PyObject *str,
587    Py_ssize_t start,
588    Py_ssize_t end);
589
590/* Copy the string into a UCS4 buffer including the null character is copy_null
591   is set. Return NULL and raise an exception on error. Raise a ValueError if
592   the buffer is smaller than the string. Return buffer on success.
593
594   buflen is the length of the buffer in (Py_UCS4) characters. */
595PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
596    PyObject *unicode,
597    Py_UCS4* buffer,
598    Py_ssize_t buflen,
599    int copy_null);
600
601/* Copy the string into a UCS4 buffer. A new buffer is allocated using
602 * PyMem_Malloc; if this fails, NULL is returned with a memory error
603   exception set. */
604PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
605
606/* Return a read-only pointer to the Unicode object's internal
607   Py_UNICODE buffer.
608   If the wchar_t/Py_UNICODE representation is not yet available, this
609   function will calculate it. */
610
611#ifndef Py_LIMITED_API
612PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
613    PyObject *unicode           /* Unicode object */
614    );
615#endif
616
617/* Return a read-only pointer to the Unicode object's internal
618   Py_UNICODE buffer and save the length at size.
619   If the wchar_t/Py_UNICODE representation is not yet available, this
620   function will calculate it. */
621
622#ifndef Py_LIMITED_API
623PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
624    PyObject *unicode,          /* Unicode object */
625    Py_ssize_t *size            /* location where to save the length */
626    );
627#endif
628
629/* Get the length of the Unicode object. */
630
631PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
632    PyObject *unicode
633);
634
635/* Get the number of Py_UNICODE units in the
636   string representation. */
637
638PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
639    PyObject *unicode           /* Unicode object */
640    );
641
642/* Read a character from the string. */
643
644PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
645    PyObject *unicode,
646    Py_ssize_t index
647    );
648
649/* Write a character to the string. The string must have been created through
650   PyUnicode_New, must not be shared, and must not have been hashed yet. */
651
652PyAPI_FUNC(int) PyUnicode_WriteChar(
653    PyObject *unicode,
654    Py_ssize_t index,
655    Py_UCS4 character
656    );
657
658#ifndef Py_LIMITED_API
659/* Get the maximum ordinal for a Unicode character. */
660PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
661#endif
662
663/* Resize an already allocated Unicode object to the new size length.
664
665   *unicode is modified to point to the new (resized) object and 0
666   returned on success.
667
668   This API may only be called by the function which also called the
669   Unicode constructor. The refcount on the object must be 1. Otherwise,
670   an error is returned.
671
672   Error handling is implemented as follows: an exception is set, -1
673   is returned and *unicode left untouched.
674
675*/
676
677PyAPI_FUNC(int) PyUnicode_Resize(
678    PyObject **unicode,         /* Pointer to the Unicode object */
679    Py_ssize_t length           /* New length */
680    );
681
682/* Coerce obj to an Unicode object and return a reference with
683   *incremented* refcount.
684
685   Coercion is done in the following way:
686
687   1. bytes, bytearray and other char buffer compatible objects are decoded
688      under the assumptions that they contain data using the UTF-8
689      encoding. Decoding is done in "strict" mode.
690
691   2. All other objects (including Unicode objects) raise an
692      exception.
693
694   The API returns NULL in case of an error. The caller is responsible
695   for decref'ing the returned objects.
696
697*/
698
699PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
700    register PyObject *obj,     /* Object */
701    const char *encoding,       /* encoding */
702    const char *errors          /* error handling */
703    );
704
705/* Coerce obj to an Unicode object and return a reference with
706   *incremented* refcount.
707
708   Unicode objects are passed back as-is (subclasses are converted to
709   true Unicode objects), all other objects are delegated to
710   PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
711   using UTF-8 encoding as basis for decoding the object.
712
713   The API returns NULL in case of an error. The caller is responsible
714   for decref'ing the returned objects.
715
716*/
717
718PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
719    register PyObject *obj      /* Object */
720    );
721
722PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
723    const char *format,   /* ASCII-encoded string  */
724    va_list vargs
725    );
726PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
727    const char *format,   /* ASCII-encoded string  */
728    ...
729    );
730
731#ifndef Py_LIMITED_API
732/* Format the object based on the format_spec, as defined in PEP 3101
733   (Advanced String Formatting). */
734PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
735                                                 PyObject *format_spec,
736                                                 Py_ssize_t start,
737                                                 Py_ssize_t end);
738#endif
739
740PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
741PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
742PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
743    const char *u              /* UTF-8 encoded string */
744    );
745#ifndef Py_LIMITED_API
746PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
747#endif
748
749/* Use only if you know it's a string */
750#define PyUnicode_CHECK_INTERNED(op) \
751    (((PyASCIIObject *)(op))->state.interned)
752
753/* --- wchar_t support for platforms which support it --------------------- */
754
755#ifdef HAVE_WCHAR_H
756
757/* Create a Unicode Object from the wchar_t buffer w of the given
758   size.
759
760   The buffer is copied into the new object. */
761
762PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
763    register const wchar_t *w,  /* wchar_t buffer */
764    Py_ssize_t size             /* size of buffer */
765    );
766
767/* Copies the Unicode Object contents into the wchar_t buffer w.  At
768   most size wchar_t characters are copied.
769
770   Note that the resulting wchar_t string may or may not be
771   0-terminated.  It is the responsibility of the caller to make sure
772   that the wchar_t string is 0-terminated in case this is required by
773   the application.
774
775   Returns the number of wchar_t characters copied (excluding a
776   possibly trailing 0-termination character) or -1 in case of an
777   error. */
778
779PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
780    PyObject *unicode,          /* Unicode object */
781    register wchar_t *w,        /* wchar_t buffer */
782    Py_ssize_t size             /* size of buffer */
783    );
784
785/* Convert the Unicode object to a wide character string. The output string
786   always ends with a nul character. If size is not NULL, write the number of
787   wide characters (excluding the null character) into *size.
788
789   Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
790   on success. On error, returns NULL, *size is undefined and raises a
791   MemoryError. */
792
793PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
794    PyObject *unicode,          /* Unicode object */
795    Py_ssize_t *size            /* number of characters of the result */
796    );
797
798PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
799
800#endif
801
802/* --- Unicode ordinals --------------------------------------------------- */
803
804/* Create a Unicode Object from the given Unicode code point ordinal.
805
806   The ordinal must be in range(0x10000) on narrow Python builds
807   (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
808   raised in case it is not.
809
810*/
811
812PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
813
814/* --- Free-list management ----------------------------------------------- */
815
816/* Clear the free list used by the Unicode implementation.
817
818   This can be used to release memory used for objects on the free
819   list back to the Python memory allocator.
820
821*/
822
823PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
824
825/* === Builtin Codecs =====================================================
826
827   Many of these APIs take two arguments encoding and errors. These
828   parameters encoding and errors have the same semantics as the ones
829   of the builtin str() API.
830
831   Setting encoding to NULL causes the default encoding (UTF-8) to be used.
832
833   Error handling is set by errors which may also be set to NULL
834   meaning to use the default handling defined for the codec. Default
835   error handling for all builtin codecs is "strict" (ValueErrors are
836   raised).
837
838   The codecs all use a similar interface. Only deviation from the
839   generic ones are documented.
840
841*/
842
843/* --- Manage the default encoding ---------------------------------------- */
844
845/* Returns a pointer to the default encoding (UTF-8) of the
846   Unicode object unicode and the size of the encoded representation
847   in bytes stored in *size.
848
849   In case of an error, no *size is set.
850
851   This funcation caches the UTF-8 encoded string in the unicodeobject
852   and subsequent calls will return the same string.  The memory is relased
853   when the unicodeobject is deallocated.
854
855   _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
856   support the previous internal function with the same behaviour.
857
858   *** This API is for interpreter INTERNAL USE ONLY and will likely
859   *** be removed or changed in the future.
860
861   *** If you need to access the Unicode object as UTF-8 bytes string,
862   *** please use PyUnicode_AsUTF8String() instead.
863*/
864
865#ifndef Py_LIMITED_API
866PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
867    PyObject *unicode,
868    Py_ssize_t *size);
869#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
870#endif
871
872/* Returns a pointer to the default encoding (UTF-8) of the
873   Unicode object unicode.
874
875   Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
876   in the unicodeobject.
877
878   _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
879   support the previous internal function with the same behaviour.
880
881   Use of this API is DEPRECATED since no size information can be
882   extracted from the returned data.
883
884   *** This API is for interpreter INTERNAL USE ONLY and will likely
885   *** be removed or changed for Python 3.1.
886
887   *** If you need to access the Unicode object as UTF-8 bytes string,
888   *** please use PyUnicode_AsUTF8String() instead.
889
890*/
891
892#ifndef Py_LIMITED_API
893PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
894#define _PyUnicode_AsString PyUnicode_AsUTF8
895#endif
896
897/* Returns "utf-8".  */
898
899PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
900
901/* --- Generic Codecs ----------------------------------------------------- */
902
903/* Create a Unicode object by decoding the encoded string s of the
904   given size. */
905
906PyAPI_FUNC(PyObject*) PyUnicode_Decode(
907    const char *s,              /* encoded string */
908    Py_ssize_t size,            /* size of buffer */
909    const char *encoding,       /* encoding */
910    const char *errors          /* error handling */
911    );
912
913/* Decode a Unicode object unicode and return the result as Python
914   object. */
915
916PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
917    PyObject *unicode,          /* Unicode object */
918    const char *encoding,       /* encoding */
919    const char *errors          /* error handling */
920    );
921
922/* Decode a Unicode object unicode and return the result as Unicode
923   object. */
924
925PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
926    PyObject *unicode,          /* Unicode object */
927    const char *encoding,       /* encoding */
928    const char *errors          /* error handling */
929    );
930
931/* Encodes a Py_UNICODE buffer of the given size and returns a
932   Python string object. */
933
934#ifndef Py_LIMITED_API
935PyAPI_FUNC(PyObject*) PyUnicode_Encode(
936    const Py_UNICODE *s,        /* Unicode char buffer */
937    Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
938    const char *encoding,       /* encoding */
939    const char *errors          /* error handling */
940    );
941#endif
942
943/* Encodes a Unicode object and returns the result as Python
944   object. */
945
946PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
947    PyObject *unicode,          /* Unicode object */
948    const char *encoding,       /* encoding */
949    const char *errors          /* error handling */
950    );
951
952/* Encodes a Unicode object and returns the result as Python string
953   object. */
954
955PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
956    PyObject *unicode,          /* Unicode object */
957    const char *encoding,       /* encoding */
958    const char *errors          /* error handling */
959    );
960
961/* Encodes a Unicode object and returns the result as Unicode
962   object. */
963
964PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
965    PyObject *unicode,          /* Unicode object */
966    const char *encoding,       /* encoding */
967    const char *errors          /* error handling */
968    );
969
970/* Build an encoding map. */
971
972PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
973    PyObject* string            /* 256 character map */
974   );
975
976/* --- UTF-7 Codecs ------------------------------------------------------- */
977
978PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
979    const char *string,         /* UTF-7 encoded string */
980    Py_ssize_t length,          /* size of string */
981    const char *errors          /* error handling */
982    );
983
984PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
985    const char *string,         /* UTF-7 encoded string */
986    Py_ssize_t length,          /* size of string */
987    const char *errors,         /* error handling */
988    Py_ssize_t *consumed        /* bytes consumed */
989    );
990
991#ifndef Py_LIMITED_API
992PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
993    const Py_UNICODE *data,     /* Unicode char buffer */
994    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
995    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
996    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
997    const char *errors          /* error handling */
998    );
999#endif
1000
1001/* --- UTF-8 Codecs ------------------------------------------------------- */
1002
1003PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1004    const char *string,         /* UTF-8 encoded string */
1005    Py_ssize_t length,          /* size of string */
1006    const char *errors          /* error handling */
1007    );
1008
1009PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1010    const char *string,         /* UTF-8 encoded string */
1011    Py_ssize_t length,          /* size of string */
1012    const char *errors,         /* error handling */
1013    Py_ssize_t *consumed        /* bytes consumed */
1014    );
1015
1016PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1017    PyObject *unicode           /* Unicode object */
1018    );
1019
1020#ifndef Py_LIMITED_API
1021PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1022    PyObject *unicode,
1023    const char *errors);
1024
1025PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1026    const Py_UNICODE *data,     /* Unicode char buffer */
1027    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1028    const char *errors          /* error handling */
1029    );
1030#endif
1031
1032/* --- UTF-32 Codecs ------------------------------------------------------ */
1033
1034/* Decodes length bytes from a UTF-32 encoded buffer string and returns
1035   the corresponding Unicode object.
1036
1037   errors (if non-NULL) defines the error handling. It defaults
1038   to "strict".
1039
1040   If byteorder is non-NULL, the decoder starts decoding using the
1041   given byte order:
1042
1043    *byteorder == -1: little endian
1044    *byteorder == 0:  native order
1045    *byteorder == 1:  big endian
1046
1047   In native mode, the first four bytes of the stream are checked for a
1048   BOM mark. If found, the BOM mark is analysed, the byte order
1049   adjusted and the BOM skipped.  In the other modes, no BOM mark
1050   interpretation is done. After completion, *byteorder is set to the
1051   current byte order at the end of input data.
1052
1053   If byteorder is NULL, the codec starts in native order mode.
1054
1055*/
1056
1057PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1058    const char *string,         /* UTF-32 encoded string */
1059    Py_ssize_t length,          /* size of string */
1060    const char *errors,         /* error handling */
1061    int *byteorder              /* pointer to byteorder to use
1062                                   0=native;-1=LE,1=BE; updated on
1063                                   exit */
1064    );
1065
1066PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1067    const char *string,         /* UTF-32 encoded string */
1068    Py_ssize_t length,          /* size of string */
1069    const char *errors,         /* error handling */
1070    int *byteorder,             /* pointer to byteorder to use
1071                                   0=native;-1=LE,1=BE; updated on
1072                                   exit */
1073    Py_ssize_t *consumed        /* bytes consumed */
1074    );
1075
1076/* Returns a Python string using the UTF-32 encoding in native byte
1077   order. The string always starts with a BOM mark.  */
1078
1079PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1080    PyObject *unicode           /* Unicode object */
1081    );
1082
1083/* Returns a Python string object holding the UTF-32 encoded value of
1084   the Unicode data.
1085
1086   If byteorder is not 0, output is written according to the following
1087   byte order:
1088
1089   byteorder == -1: little endian
1090   byteorder == 0:  native byte order (writes a BOM mark)
1091   byteorder == 1:  big endian
1092
1093   If byteorder is 0, the output string will always start with the
1094   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1095   prepended.
1096
1097*/
1098
1099#ifndef Py_LIMITED_API
1100PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1101    const Py_UNICODE *data,     /* Unicode char buffer */
1102    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1103    const char *errors,         /* error handling */
1104    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1105    );
1106#endif
1107
1108/* --- UTF-16 Codecs ------------------------------------------------------ */
1109
1110/* Decodes length bytes from a UTF-16 encoded buffer string and returns
1111   the corresponding Unicode object.
1112
1113   errors (if non-NULL) defines the error handling. It defaults
1114   to "strict".
1115
1116   If byteorder is non-NULL, the decoder starts decoding using the
1117   given byte order:
1118
1119    *byteorder == -1: little endian
1120    *byteorder == 0:  native order
1121    *byteorder == 1:  big endian
1122
1123   In native mode, the first two bytes of the stream are checked for a
1124   BOM mark. If found, the BOM mark is analysed, the byte order
1125   adjusted and the BOM skipped.  In the other modes, no BOM mark
1126   interpretation is done. After completion, *byteorder is set to the
1127   current byte order at the end of input data.
1128
1129   If byteorder is NULL, the codec starts in native order mode.
1130
1131*/
1132
1133PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1134    const char *string,         /* UTF-16 encoded string */
1135    Py_ssize_t length,          /* size of string */
1136    const char *errors,         /* error handling */
1137    int *byteorder              /* pointer to byteorder to use
1138                                   0=native;-1=LE,1=BE; updated on
1139                                   exit */
1140    );
1141
1142PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1143    const char *string,         /* UTF-16 encoded string */
1144    Py_ssize_t length,          /* size of string */
1145    const char *errors,         /* error handling */
1146    int *byteorder,             /* pointer to byteorder to use
1147                                   0=native;-1=LE,1=BE; updated on
1148                                   exit */
1149    Py_ssize_t *consumed        /* bytes consumed */
1150    );
1151
1152/* Returns a Python string using the UTF-16 encoding in native byte
1153   order. The string always starts with a BOM mark.  */
1154
1155PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1156    PyObject *unicode           /* Unicode object */
1157    );
1158
1159/* Returns a Python string object holding the UTF-16 encoded value of
1160   the Unicode data.
1161
1162   If byteorder is not 0, output is written according to the following
1163   byte order:
1164
1165   byteorder == -1: little endian
1166   byteorder == 0:  native byte order (writes a BOM mark)
1167   byteorder == 1:  big endian
1168
1169   If byteorder is 0, the output string will always start with the
1170   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1171   prepended.
1172
1173   Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1174   UCS-2. This trick makes it possible to add full UTF-16 capabilities
1175   at a later point without compromising the APIs.
1176
1177*/
1178
1179#ifndef Py_LIMITED_API
1180PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1181    const Py_UNICODE *data,     /* Unicode char buffer */
1182    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1183    const char *errors,         /* error handling */
1184    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1185    );
1186#endif
1187
1188/* --- Unicode-Escape Codecs ---------------------------------------------- */
1189
1190PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1191    const char *string,         /* Unicode-Escape encoded string */
1192    Py_ssize_t length,          /* size of string */
1193    const char *errors          /* error handling */
1194    );
1195
1196PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1197    PyObject *unicode           /* Unicode object */
1198    );
1199
1200#ifndef Py_LIMITED_API
1201PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1202    const Py_UNICODE *data,     /* Unicode char buffer */
1203    Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1204    );
1205#endif
1206
1207/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1208
1209PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1210    const char *string,         /* Raw-Unicode-Escape encoded string */
1211    Py_ssize_t length,          /* size of string */
1212    const char *errors          /* error handling */
1213    );
1214
1215PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1216    PyObject *unicode           /* Unicode object */
1217    );
1218
1219#ifndef Py_LIMITED_API
1220PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1221    const Py_UNICODE *data,     /* Unicode char buffer */
1222    Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1223    );
1224#endif
1225
1226/* --- Unicode Internal Codec ---------------------------------------------
1227
1228    Only for internal use in _codecsmodule.c */
1229
1230#ifndef Py_LIMITED_API
1231PyObject *_PyUnicode_DecodeUnicodeInternal(
1232    const char *string,
1233    Py_ssize_t length,
1234    const char *errors
1235    );
1236#endif
1237
1238/* --- Latin-1 Codecs -----------------------------------------------------
1239
1240   Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1241
1242*/
1243
1244PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1245    const char *string,         /* Latin-1 encoded string */
1246    Py_ssize_t length,          /* size of string */
1247    const char *errors          /* error handling */
1248    );
1249
1250PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1251    PyObject *unicode           /* Unicode object */
1252    );
1253
1254#ifndef Py_LIMITED_API
1255PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1256    PyObject* unicode,
1257    const char* errors);
1258
1259PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1260    const Py_UNICODE *data,     /* Unicode char buffer */
1261    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1262    const char *errors          /* error handling */
1263    );
1264#endif
1265
1266/* --- ASCII Codecs -------------------------------------------------------
1267
1268   Only 7-bit ASCII data is excepted. All other codes generate errors.
1269
1270*/
1271
1272PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1273    const char *string,         /* ASCII encoded string */
1274    Py_ssize_t length,          /* size of string */
1275    const char *errors          /* error handling */
1276    );
1277
1278PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1279    PyObject *unicode           /* Unicode object */
1280    );
1281
1282#ifndef Py_LIMITED_API
1283PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1284    PyObject* unicode,
1285    const char* errors);
1286
1287PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1288    const Py_UNICODE *data,     /* Unicode char buffer */
1289    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1290    const char *errors          /* error handling */
1291    );
1292#endif
1293
1294/* --- Character Map Codecs -----------------------------------------------
1295
1296   This codec uses mappings to encode and decode characters.
1297
1298   Decoding mappings must map single string characters to single
1299   Unicode characters, integers (which are then interpreted as Unicode
1300   ordinals) or None (meaning "undefined mapping" and causing an
1301   error).
1302
1303   Encoding mappings must map single Unicode characters to single
1304   string characters, integers (which are then interpreted as Latin-1
1305   ordinals) or None (meaning "undefined mapping" and causing an
1306   error).
1307
1308   If a character lookup fails with a LookupError, the character is
1309   copied as-is meaning that its ordinal value will be interpreted as
1310   Unicode or Latin-1 ordinal resp. Because of this mappings only need
1311   to contain those mappings which map characters to different code
1312   points.
1313
1314*/
1315
1316PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1317    const char *string,         /* Encoded string */
1318    Py_ssize_t length,          /* size of string */
1319    PyObject *mapping,          /* character mapping
1320                                   (char ordinal -> unicode ordinal) */
1321    const char *errors          /* error handling */
1322    );
1323
1324PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1325    PyObject *unicode,          /* Unicode object */
1326    PyObject *mapping           /* character mapping
1327                                   (unicode ordinal -> char ordinal) */
1328    );
1329
1330#ifndef Py_LIMITED_API
1331PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1332    const Py_UNICODE *data,     /* Unicode char buffer */
1333    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1334    PyObject *mapping,          /* character mapping
1335                                   (unicode ordinal -> char ordinal) */
1336    const char *errors          /* error handling */
1337    );
1338#endif
1339
1340/* Translate a Py_UNICODE buffer of the given length by applying a
1341   character mapping table to it and return the resulting Unicode
1342   object.
1343
1344   The mapping table must map Unicode ordinal integers to Unicode
1345   ordinal integers or None (causing deletion of the character).
1346
1347   Mapping tables may be dictionaries or sequences. Unmapped character
1348   ordinals (ones which cause a LookupError) are left untouched and
1349   are copied as-is.
1350
1351*/
1352
1353#ifndef Py_LIMITED_API
1354PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1355    const Py_UNICODE *data,     /* Unicode char buffer */
1356    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1357    PyObject *table,            /* Translate table */
1358    const char *errors          /* error handling */
1359    );
1360#endif
1361
1362#ifdef HAVE_MBCS
1363
1364/* --- MBCS codecs for Windows -------------------------------------------- */
1365
1366PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1367    const char *string,         /* MBCS encoded string */
1368    Py_ssize_t length,              /* size of string */
1369    const char *errors          /* error handling */
1370    );
1371
1372PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1373    const char *string,         /* MBCS encoded string */
1374    Py_ssize_t length,          /* size of string */
1375    const char *errors,         /* error handling */
1376    Py_ssize_t *consumed        /* bytes consumed */
1377    );
1378
1379PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1380    PyObject *unicode           /* Unicode object */
1381    );
1382
1383#ifndef Py_LIMITED_API
1384PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1385    const Py_UNICODE *data,     /* Unicode char buffer */
1386    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1387    const char *errors          /* error handling */
1388    );
1389#endif
1390
1391#endif /* HAVE_MBCS */
1392
1393/* --- Decimal Encoder ---------------------------------------------------- */
1394
1395/* Takes a Unicode string holding a decimal value and writes it into
1396   an output buffer using standard ASCII digit codes.
1397
1398   The output buffer has to provide at least length+1 bytes of storage
1399   area. The output string is 0-terminated.
1400
1401   The encoder converts whitespace to ' ', decimal characters to their
1402   corresponding ASCII digit and all other Latin-1 characters except
1403   \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1404   are treated as errors. This includes embedded NULL bytes.
1405
1406   Error handling is defined by the errors argument:
1407
1408      NULL or "strict": raise a ValueError
1409      "ignore": ignore the wrong characters (these are not copied to the
1410                output buffer)
1411      "replace": replaces illegal characters with '?'
1412
1413   Returns 0 on success, -1 on failure.
1414
1415*/
1416
1417#ifndef Py_LIMITED_API
1418PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1419    Py_UNICODE *s,              /* Unicode buffer */
1420    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1421    char *output,               /* Output buffer; must have size >= length */
1422    const char *errors          /* error handling */
1423    );
1424#endif
1425
1426/* Transforms code points that have decimal digit property to the
1427   corresponding ASCII digit code points.
1428
1429   Returns a new Unicode string on success, NULL on failure.
1430*/
1431
1432#ifndef Py_LIMITED_API
1433PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1434    Py_UNICODE *s,              /* Unicode buffer */
1435    Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
1436    );
1437#endif
1438
1439/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject
1440   as argument instead of a raw buffer and length.  This function additionally
1441   transforms spaces to ASCII because this is what the callers in longobject,
1442   floatobject, and complexobject did anyways. */
1443
1444#ifndef Py_LIMITED_API
1445PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1446    PyObject *unicode           /* Unicode object */
1447    );
1448#endif
1449
1450/* --- File system encoding ---------------------------------------------- */
1451
1452/* ParseTuple converter: encode str objects to bytes using
1453   PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
1454
1455PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1456
1457/* ParseTuple converter: decode bytes objects to unicode using
1458   PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1459
1460PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1461
1462/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1463   and the "surrogateescape" error handler.
1464
1465   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1466   encoding.
1467
1468   Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1469*/
1470
1471PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1472    const char *s               /* encoded string */
1473    );
1474
1475/* Decode a string using Py_FileSystemDefaultEncoding
1476   and the "surrogateescape" error handler.
1477
1478   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1479   encoding.
1480*/
1481
1482PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1483    const char *s,               /* encoded string */
1484    Py_ssize_t size              /* size */
1485    );
1486
1487/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1488   "surrogateescape" error handler, and return bytes.
1489
1490   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1491   encoding.
1492*/
1493
1494PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1495    PyObject *unicode
1496    );
1497
1498/* --- Methods & Slots ----------------------------------------------------
1499
1500   These are capable of handling Unicode objects and strings on input
1501   (we refer to them as strings in the descriptions) and return
1502   Unicode objects or integers as apporpriate. */
1503
1504/* Concat two strings giving a new Unicode string. */
1505
1506PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1507    PyObject *left,             /* Left string */
1508    PyObject *right             /* Right string */
1509    );
1510
1511/* Concat two strings and put the result in *pleft
1512   (sets *pleft to NULL on error) */
1513
1514PyAPI_FUNC(void) PyUnicode_Append(
1515    PyObject **pleft,           /* Pointer to left string */
1516    PyObject *right             /* Right string */
1517    );
1518
1519/* Concat two strings, put the result in *pleft and drop the right object
1520   (sets *pleft to NULL on error) */
1521
1522PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1523    PyObject **pleft,           /* Pointer to left string */
1524    PyObject *right             /* Right string */
1525    );
1526
1527/* Split a string giving a list of Unicode strings.
1528
1529   If sep is NULL, splitting will be done at all whitespace
1530   substrings. Otherwise, splits occur at the given separator.
1531
1532   At most maxsplit splits will be done. If negative, no limit is set.
1533
1534   Separators are not included in the resulting list.
1535
1536*/
1537
1538PyAPI_FUNC(PyObject*) PyUnicode_Split(
1539    PyObject *s,                /* String to split */
1540    PyObject *sep,              /* String separator */
1541    Py_ssize_t maxsplit         /* Maxsplit count */
1542    );
1543
1544/* Dito, but split at line breaks.
1545
1546   CRLF is considered to be one line break. Line breaks are not
1547   included in the resulting list. */
1548
1549PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1550    PyObject *s,                /* String to split */
1551    int keepends                /* If true, line end markers are included */
1552    );
1553
1554/* Partition a string using a given separator. */
1555
1556PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1557    PyObject *s,                /* String to partition */
1558    PyObject *sep               /* String separator */
1559    );
1560
1561/* Partition a string using a given separator, searching from the end of the
1562   string. */
1563
1564PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1565    PyObject *s,                /* String to partition */
1566    PyObject *sep               /* String separator */
1567    );
1568
1569/* Split a string giving a list of Unicode strings.
1570
1571   If sep is NULL, splitting will be done at all whitespace
1572   substrings. Otherwise, splits occur at the given separator.
1573
1574   At most maxsplit splits will be done. But unlike PyUnicode_Split
1575   PyUnicode_RSplit splits from the end of the string. If negative,
1576   no limit is set.
1577
1578   Separators are not included in the resulting list.
1579
1580*/
1581
1582PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1583    PyObject *s,                /* String to split */
1584    PyObject *sep,              /* String separator */
1585    Py_ssize_t maxsplit         /* Maxsplit count */
1586    );
1587
1588/* Translate a string by applying a character mapping table to it and
1589   return the resulting Unicode object.
1590
1591   The mapping table must map Unicode ordinal integers to Unicode
1592   ordinal integers or None (causing deletion of the character).
1593
1594   Mapping tables may be dictionaries or sequences. Unmapped character
1595   ordinals (ones which cause a LookupError) are left untouched and
1596   are copied as-is.
1597
1598*/
1599
1600PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1601    PyObject *str,              /* String */
1602    PyObject *table,            /* Translate table */
1603    const char *errors          /* error handling */
1604    );
1605
1606/* Join a sequence of strings using the given separator and return
1607   the resulting Unicode string. */
1608
1609PyAPI_FUNC(PyObject*) PyUnicode_Join(
1610    PyObject *separator,        /* Separator string */
1611    PyObject *seq               /* Sequence object */
1612    );
1613
1614/* Return 1 if substr matches str[start:end] at the given tail end, 0
1615   otherwise. */
1616
1617PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1618    PyObject *str,              /* String */
1619    PyObject *substr,           /* Prefix or Suffix string */
1620    Py_ssize_t start,           /* Start index */
1621    Py_ssize_t end,             /* Stop index */
1622    int direction               /* Tail end: -1 prefix, +1 suffix */
1623    );
1624
1625/* Return the first position of substr in str[start:end] using the
1626   given search direction or -1 if not found. -2 is returned in case
1627   an error occurred and an exception is set. */
1628
1629PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1630    PyObject *str,              /* String */
1631    PyObject *substr,           /* Substring to find */
1632    Py_ssize_t start,           /* Start index */
1633    Py_ssize_t end,             /* Stop index */
1634    int direction               /* Find direction: +1 forward, -1 backward */
1635    );
1636
1637/* Like PyUnicode_Find, but search for single character only. */
1638PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
1639    PyObject *str,
1640    Py_UCS4 ch,
1641    Py_ssize_t start,
1642    Py_ssize_t end,
1643    int direction
1644    );
1645
1646/* Count the number of occurrences of substr in str[start:end]. */
1647
1648PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1649    PyObject *str,              /* String */
1650    PyObject *substr,           /* Substring to count */
1651    Py_ssize_t start,           /* Start index */
1652    Py_ssize_t end              /* Stop index */
1653    );
1654
1655/* Replace at most maxcount occurrences of substr in str with replstr
1656   and return the resulting Unicode object. */
1657
1658PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1659    PyObject *str,              /* String */
1660    PyObject *substr,           /* Substring to find */
1661    PyObject *replstr,          /* Substring to replace */
1662    Py_ssize_t maxcount         /* Max. number of replacements to apply;
1663                                   -1 = all */
1664    );
1665
1666/* Compare two strings and return -1, 0, 1 for less than, equal,
1667   greater than resp. */
1668
1669PyAPI_FUNC(int) PyUnicode_Compare(
1670    PyObject *left,             /* Left string */
1671    PyObject *right             /* Right string */
1672    );
1673
1674PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1675    PyObject *left,
1676    const char *right           /* ASCII-encoded string */
1677    );
1678
1679/* Rich compare two strings and return one of the following:
1680
1681   - NULL in case an exception was raised
1682   - Py_True or Py_False for successfuly comparisons
1683   - Py_NotImplemented in case the type combination is unknown
1684
1685   Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1686   case the conversion of the arguments to Unicode fails with a
1687   UnicodeDecodeError.
1688
1689   Possible values for op:
1690
1691     Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1692
1693*/
1694
1695PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
1696    PyObject *left,             /* Left string */
1697    PyObject *right,            /* Right string */
1698    int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
1699    );
1700
1701/* Apply a argument tuple or dictionary to a format string and return
1702   the resulting Unicode string. */
1703
1704PyAPI_FUNC(PyObject *) PyUnicode_Format(
1705    PyObject *format,           /* Format string */
1706    PyObject *args              /* Argument tuple or dictionary */
1707    );
1708
1709/* Checks whether element is contained in container and return 1/0
1710   accordingly.
1711
1712   element has to coerce to an one element Unicode string. -1 is
1713   returned in case of an error. */
1714
1715PyAPI_FUNC(int) PyUnicode_Contains(
1716    PyObject *container,        /* Container string */
1717    PyObject *element           /* Element string */
1718    );
1719
1720/* Checks whether argument is a valid identifier. */
1721
1722PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1723
1724#ifndef Py_LIMITED_API
1725/* Externally visible for str.strip(unicode) */
1726PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1727    PyUnicodeObject *self,
1728    int striptype,
1729    PyObject *sepobj
1730    );
1731#endif
1732
1733/* Using the current locale, insert the thousands grouping
1734   into the string pointed to by buffer.  For the argument descriptions,
1735   see Objects/stringlib/localeutil.h */
1736
1737#ifndef Py_LIMITED_API
1738PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1739                                                   Py_ssize_t n_buffer,
1740                                                   Py_UNICODE *digits,
1741                                                   Py_ssize_t n_digits,
1742                                                   Py_ssize_t min_width);
1743#endif
1744
1745/* Using explicit passed-in values, insert the thousands grouping
1746   into the string pointed to by buffer.  For the argument descriptions,
1747   see Objects/stringlib/localeutil.h */
1748#ifndef Py_LIMITED_API
1749PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1750    int kind,
1751    void *buffer,
1752    Py_ssize_t n_buffer,
1753    void *digits,
1754    Py_ssize_t n_digits,
1755    Py_ssize_t min_width,
1756    const char *grouping,
1757    const char *thousands_sep);
1758#endif
1759/* === Characters Type APIs =============================================== */
1760
1761/* Helper array used by Py_UNICODE_ISSPACE(). */
1762
1763#ifndef Py_LIMITED_API
1764PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1765
1766/* These should not be used directly. Use the Py_UNICODE_IS* and
1767   Py_UNICODE_TO* macros instead.
1768
1769   These APIs are implemented in Objects/unicodectype.c.
1770
1771*/
1772
1773PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1774    Py_UCS4 ch       /* Unicode character */
1775    );
1776
1777PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1778    Py_UCS4 ch       /* Unicode character */
1779    );
1780
1781PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1782    Py_UCS4 ch       /* Unicode character */
1783    );
1784
1785PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1786    Py_UCS4 ch       /* Unicode character */
1787    );
1788
1789PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1790    Py_UCS4 ch       /* Unicode character */
1791    );
1792
1793PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1794    const Py_UCS4 ch         /* Unicode character */
1795    );
1796
1797PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1798    const Py_UCS4 ch         /* Unicode character */
1799    );
1800
1801PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1802    Py_UCS4 ch       /* Unicode character */
1803    );
1804
1805PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1806    Py_UCS4 ch       /* Unicode character */
1807    );
1808
1809PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1810    Py_UCS4 ch       /* Unicode character */
1811    );
1812
1813PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1814    Py_UCS4 ch       /* Unicode character */
1815    );
1816
1817PyAPI_FUNC(int) _PyUnicode_ToDigit(
1818    Py_UCS4 ch       /* Unicode character */
1819    );
1820
1821PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1822    Py_UCS4 ch       /* Unicode character */
1823    );
1824
1825PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1826    Py_UCS4 ch       /* Unicode character */
1827    );
1828
1829PyAPI_FUNC(int) _PyUnicode_IsDigit(
1830    Py_UCS4 ch       /* Unicode character */
1831    );
1832
1833PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1834    Py_UCS4 ch       /* Unicode character */
1835    );
1836
1837PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1838    Py_UCS4 ch       /* Unicode character */
1839    );
1840
1841PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1842    Py_UCS4 ch       /* Unicode character */
1843    );
1844
1845PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1846    const Py_UNICODE *u
1847    );
1848
1849PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
1850    Py_UNICODE *s1,
1851    const Py_UNICODE *s2);
1852
1853PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1854    Py_UNICODE *s1, const Py_UNICODE *s2);
1855
1856PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
1857    Py_UNICODE *s1,
1858    const Py_UNICODE *s2,
1859    size_t n);
1860
1861PyAPI_FUNC(int) Py_UNICODE_strcmp(
1862    const Py_UNICODE *s1,
1863    const Py_UNICODE *s2
1864    );
1865
1866PyAPI_FUNC(int) Py_UNICODE_strncmp(
1867    const Py_UNICODE *s1,
1868    const Py_UNICODE *s2,
1869    size_t n
1870    );
1871
1872PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
1873    const Py_UNICODE *s,
1874    Py_UNICODE c
1875    );
1876
1877PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
1878    const Py_UNICODE *s,
1879    Py_UNICODE c
1880    );
1881
1882PyAPI_FUNC(size_t) Py_UCS4_strlen(
1883    const Py_UCS4 *u
1884    );
1885
1886PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy(
1887    Py_UCS4 *s1,
1888    const Py_UCS4 *s2);
1889
1890PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat(
1891    Py_UCS4 *s1, const Py_UCS4 *s2);
1892
1893PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy(
1894    Py_UCS4 *s1,
1895    const Py_UCS4 *s2,
1896    size_t n);
1897
1898PyAPI_FUNC(int) Py_UCS4_strcmp(
1899    const Py_UCS4 *s1,
1900    const Py_UCS4 *s2
1901    );
1902
1903PyAPI_FUNC(int) Py_UCS4_strncmp(
1904    const Py_UCS4 *s1,
1905    const Py_UCS4 *s2,
1906    size_t n
1907    );
1908
1909PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr(
1910    const Py_UCS4 *s,
1911    Py_UCS4 c
1912    );
1913
1914PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr(
1915    const Py_UCS4 *s,
1916    Py_UCS4 c
1917    );
1918
1919/* Create a copy of a unicode string ending with a nul character. Return NULL
1920   and raise a MemoryError exception on memory allocation failure, otherwise
1921   return a new allocated buffer (use PyMem_Free() to free the buffer). */
1922
1923PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
1924    PyObject *unicode
1925    );
1926#endif /* Py_LIMITED_API */
1927
1928#ifdef __cplusplus
1929}
1930#endif
1931#endif /* !Py_UNICODEOBJECT_H */
1932