unicodeobject.h revision 37943769ef7594c9fb6a0c23ff4094376b49c3ea
1#ifndef Py_UNICODEOBJECT_H
2#define Py_UNICODEOBJECT_H
3
4#include <stdarg.h>
5
6/*
7
8Unicode implementation based on original code by Fredrik Lundh,
9modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10Unicode Integration Proposal. (See
11http://www.egenix.com/files/python/unicode-proposal.txt).
12
13Copyright (c) Corporation for National Research Initiatives.
14
15
16 Original header:
17 --------------------------------------------------------------------
18
19 * Yet another Unicode string type for Python.  This type supports the
20 * 16-bit Basic Multilingual Plane (BMP) only.
21 *
22 * Written by Fredrik Lundh, January 1999.
23 *
24 * Copyright (c) 1999 by Secret Labs AB.
25 * Copyright (c) 1999 by Fredrik Lundh.
26 *
27 * fredrik@pythonware.com
28 * http://www.pythonware.com
29 *
30 * --------------------------------------------------------------------
31 * This Unicode String Type is
32 *
33 * Copyright (c) 1999 by Secret Labs AB
34 * Copyright (c) 1999 by Fredrik Lundh
35 *
36 * By obtaining, using, and/or copying this software and/or its
37 * associated documentation, you agree that you have read, understood,
38 * and will comply with the following terms and conditions:
39 *
40 * Permission to use, copy, modify, and distribute this software and its
41 * associated documentation for any purpose and without fee is hereby
42 * granted, provided that the above copyright notice appears in all
43 * copies, and that both that copyright notice and this permission notice
44 * appear in supporting documentation, and that the name of Secret Labs
45 * AB or the author not be used in advertising or publicity pertaining to
46 * distribution of the software without specific, written prior
47 * permission.
48 *
49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51 * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56 * -------------------------------------------------------------------- */
57
58#include <ctype.h>
59
60/* === Internal API ======================================================= */
61
62/* --- Internal Unicode Format -------------------------------------------- */
63
64/* Python 3.x requires unicode */
65#define Py_USING_UNICODE
66
67#ifndef SIZEOF_WCHAR_T
68#error Must define SIZEOF_WCHAR_T
69#endif
70
71#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
72
73/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
74   Otherwise, Unicode strings are stored as UCS-2 (with limited support
75   for UTF-16) */
76
77#if Py_UNICODE_SIZE >= 4
78#define Py_UNICODE_WIDE
79#endif
80
81/* Set these flags if the platform has "wchar.h" and the
82   wchar_t type is a 16-bit unsigned type */
83/* #define HAVE_WCHAR_H */
84/* #define HAVE_USABLE_WCHAR_T */
85
86/* Py_UNICODE was the native Unicode storage format (code unit) used by
87   Python and represents a single Unicode element in the Unicode type.
88   With PEP 393, Py_UNICODE is deprected and replaced with a
89   typedef to wchar_t. */
90
91#ifndef Py_LIMITED_API
92#define PY_UNICODE_TYPE wchar_t
93typedef wchar_t Py_UNICODE;
94#endif
95
96/* If the compiler provides a wchar_t type we try to support it
97   through the interface functions PyUnicode_FromWideChar(),
98   PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
99
100#ifdef HAVE_USABLE_WCHAR_T
101# ifndef HAVE_WCHAR_H
102#  define HAVE_WCHAR_H
103# endif
104#endif
105
106#if defined(MS_WINDOWS)
107#  define HAVE_MBCS
108#endif
109
110#ifdef HAVE_WCHAR_H
111/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
112# ifdef _HAVE_BSDI
113#  include <time.h>
114# endif
115#  include <wchar.h>
116#endif
117
118/* Py_UCS4 and Py_UCS2 are typdefs for the respecitve
119   unicode representations. */
120#if SIZEOF_INT >= 4
121typedef unsigned int Py_UCS4;
122#elif SIZEOF_LONG >= 4
123typedef unsigned long Py_UCS4;
124#else
125#error "Could not find a proper typedef for Py_UCS4"
126#endif
127
128typedef unsigned short Py_UCS2;
129typedef unsigned char Py_UCS1;
130
131/* --- Internal Unicode Operations ---------------------------------------- */
132
133/* Since splitting on whitespace is an important use case, and
134   whitespace in most situations is solely ASCII whitespace, we
135   optimize for the common case by using a quick look-up table
136   _Py_ascii_whitespace (see below) with an inlined check.
137
138 */
139#ifndef Py_LIMITED_API
140#define Py_UNICODE_ISSPACE(ch) \
141    ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
142
143#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
144#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
145#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
146#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
147
148#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
149#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
150#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
151
152#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
153#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
154#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
155#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
156
157#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
158#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
159#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
160
161#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
162
163#define Py_UNICODE_ISALNUM(ch) \
164       (Py_UNICODE_ISALPHA(ch) || \
165    Py_UNICODE_ISDECIMAL(ch) || \
166    Py_UNICODE_ISDIGIT(ch) || \
167    Py_UNICODE_ISNUMERIC(ch))
168
169#define Py_UNICODE_COPY(target, source, length) \
170    Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
171
172#define Py_UNICODE_FILL(target, value, length) \
173    do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
174    for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
175    } while (0)
176
177/* macros to work with surrogates */
178#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
179#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
180#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
181/* Join two surrogate characters and return a single Py_UCS4 value. */
182#define Py_UNICODE_JOIN_SURROGATES(high, low)  \
183    (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
184      ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
185
186/* Check if substring matches at given offset.  The offset must be
187   valid, and the substring must not be empty. */
188
189#define Py_UNICODE_MATCH(string, offset, substring) \
190    ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
191     ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
192     !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
193
194#endif /* Py_LIMITED_API */
195
196#ifdef __cplusplus
197extern "C" {
198#endif
199
200/* --- Unicode Type ------------------------------------------------------- */
201
202#ifndef Py_LIMITED_API
203
204/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
205   structure. state.ascii and state.compact are set, and the data
206   immediately follow the structure. utf8_length and wstr_length can be found
207   in the length field; the utf8 pointer is equal to the data pointer. */
208typedef struct {
209    PyObject_HEAD
210    Py_ssize_t length;          /* Number of code points in the string */
211    Py_hash_t hash;             /* Hash value; -1 if not set */
212    struct {
213        /*
214           SSTATE_NOT_INTERNED (0)
215           SSTATE_INTERNED_MORTAL (1)
216           SSTATE_INTERNED_IMMORTAL (2)
217
218           If interned != SSTATE_NOT_INTERNED, the two references from the
219           dictionary to this object are *not* counted in ob_refcnt.
220         */
221        unsigned int interned:2;
222        /* Character size:
223
224           PyUnicode_WCHAR_KIND (0): wchar_t*
225           PyUnicode_1BYTE_KIND (1): Py_UCS1*
226           PyUnicode_2BYTE_KIND (2): Py_UCS2*
227           PyUnicode_4BYTE_KIND (3): Py_UCS4*
228         */
229        unsigned int kind:2;
230        /* Compact is with respect to the allocation scheme. Compact unicode
231           objects only require one memory block while non-compact objects use
232           one block for the PyUnicodeObject struct and another for its data
233           buffer. */
234        unsigned int compact:1;
235        /* Compact objects which are ASCII-only also have the state.compact
236           flag set, and use the PyASCIIObject struct. */
237        unsigned int ascii:1;
238        /* The ready flag indicates whether the object layout is initialized
239           completely. This means that this is either a compact object, or
240           the data pointer is filled out. The bit is redundant, and helps
241           to minimize the test in PyUnicode_IS_READY(). */
242        unsigned int ready:1;
243    } state;
244    wchar_t *wstr;              /* wchar_t representation (null-terminated) */
245} PyASCIIObject;
246
247/* Non-ASCII strings allocated through PyUnicode_New use the
248   PyCompactUnicodeOject structure. state.compact is set, and the data
249   immediately follow the structure. */
250typedef struct {
251    PyASCIIObject _base;
252    Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
253                                 * terminating \0. */
254    char *utf8;                 /* UTF-8 representation (null-terminated) */
255    Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
256                                 * surrogates count as two code points. */
257} PyCompactUnicodeObject;
258
259/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
260   PyUnicodeObject structure. The actual string data is initially in the wstr
261   block, and copied into the data block using PyUnicode_Ready. */
262typedef struct {
263    PyCompactUnicodeObject _base;
264    union {
265        void *any;
266        Py_UCS1 *latin1;
267        Py_UCS2 *ucs2;
268        Py_UCS4 *ucs4;
269    } data;                     /* Canonical, smallest-form Unicode buffer */
270} PyUnicodeObject;
271#endif
272
273PyAPI_DATA(PyTypeObject) PyUnicode_Type;
274PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
275
276#define PyUnicode_Check(op) \
277                 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
278#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
279
280/* Fast access macros */
281#ifndef Py_LIMITED_API
282
283#define PyUnicode_WSTR_LENGTH(op) \
284    (((PyASCIIObject*)op)->state.ascii ?    \
285     ((PyASCIIObject*)op)->length :                    \
286     ((PyCompactUnicodeObject*)op)->wstr_length)
287
288/* Returns the deprecated Py_UNICODE representation's size in code units
289   (this includes surrogate pairs as 2 units).
290   If the Py_UNICODE representation is not available, it will be computed
291   on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
292
293#define PyUnicode_GET_SIZE(op) \
294    (assert(PyUnicode_Check(op)), \
295     (((PyASCIIObject *)(op))->wstr) ? \
296        PyUnicode_WSTR_LENGTH(op) :                   \
297        ((void)PyUnicode_AsUnicode((PyObject *)(op)), \
298         PyUnicode_WSTR_LENGTH(op)))
299
300#define PyUnicode_GET_DATA_SIZE(op) \
301    (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
302
303/* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
304   representation on demand.  Using this macro is very inefficient now,
305   try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
306   use PyUnicode_WRITE() and PyUnicode_READ(). */
307
308#define PyUnicode_AS_UNICODE(op) \
309    (assert(PyUnicode_Check(op)), \
310     (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
311      PyUnicode_AsUnicode((PyObject *)(op)))
312
313#define PyUnicode_AS_DATA(op) \
314    ((const char *)(PyUnicode_AS_UNICODE(op)))
315
316
317/* --- Flexible String Representaion Helper Macros (PEP 393) -------------- */
318
319/* Values for PyUnicodeObject.state: */
320
321/* Interning state. */
322#define SSTATE_NOT_INTERNED 0
323#define SSTATE_INTERNED_MORTAL 1
324#define SSTATE_INTERNED_IMMORTAL 2
325
326#define PyUnicode_IS_COMPACT_ASCII(op) (((PyASCIIObject*)op)->state.ascii)
327
328/* String contains only wstr byte characters.  This is only possible
329   when the string was created with a legacy API and PyUnicode_Ready()
330   has not been called yet.  */
331#define PyUnicode_WCHAR_KIND 0
332
333/* Return values of the PyUnicode_KIND() macro: */
334
335#define PyUnicode_1BYTE_KIND 1
336#define PyUnicode_2BYTE_KIND 2
337#define PyUnicode_4BYTE_KIND 3
338
339
340/* Return the number of bytes the string uses to represent single characters,
341   this can be 1, 2 or 4.
342
343   See also PyUnicode_KIND_SIZE(). */
344#define PyUnicode_CHARACTER_SIZE(op) \
345    (1 << (PyUnicode_KIND(op) - 1))
346
347/* Return pointers to the canonical representation casted as unsigned char,
348   Py_UCS2, or Py_UCS4 for direct character access.
349   No checks are performed, use PyUnicode_CHARACTER_SIZE or
350   PyUnicode_KIND() before to ensure these will work correctly. */
351
352#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
353#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
354#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
355
356/* Return true if the string is compact or 0 if not.
357   No type checks or Ready calls are performed. */
358#define PyUnicode_IS_COMPACT(op) \
359    (((PyASCIIObject*)(op))->state.compact)
360
361/* Return one of the PyUnicode_*_KIND values defined above. */
362#define PyUnicode_KIND(op) \
363    (assert(PyUnicode_Check(op)), \
364     assert(PyUnicode_IS_READY(op)),            \
365     ((PyASCIIObject *)(op))->state.kind)
366
367/* Return a void pointer to the raw unicode buffer. */
368#define _PyUnicode_COMPACT_DATA(op)                     \
369    (PyUnicode_IS_COMPACT_ASCII(op) ?                   \
370     ((void*)((PyASCIIObject*)(op) + 1)) :              \
371     ((void*)((PyCompactUnicodeObject*)(op) + 1)))
372
373#define _PyUnicode_NONCOMPACT_DATA(op)                  \
374    (assert(((PyUnicodeObject*)(op))->data.any),        \
375     ((((PyUnicodeObject *)(op))->data.any)))
376
377#define PyUnicode_DATA(op) \
378    (assert(PyUnicode_Check(op)), \
379     PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
380     _PyUnicode_NONCOMPACT_DATA(op))
381
382/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
383   The index is a character index, the result is a size in bytes.
384
385   See also PyUnicode_CHARACTER_SIZE(). */
386#define PyUnicode_KIND_SIZE(kind, index) ((index) << ((kind) - 1))
387
388/* In the access macros below, "kind" may be evaluated more than once.
389   All other macro parameters are evaluated exactly once, so it is safe
390   to put side effects into them (such as increasing the index). */
391
392/* Write into the canonical representation, this macro does not do any sanity
393   checks and is intended for usage in loops.  The caller should cache the
394   kind and data pointers optained form other macro calls.
395   index is the index in the string (starts at 0) and value is the new
396   code point value which shoule be written to that location. */
397#define PyUnicode_WRITE(kind, data, index, value) \
398    do { \
399        switch ((kind)) { \
400        case PyUnicode_1BYTE_KIND: { \
401            ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
402            break; \
403        } \
404        case PyUnicode_2BYTE_KIND: { \
405            ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
406            break; \
407        } \
408        default: { \
409            assert((kind) == PyUnicode_4BYTE_KIND); \
410            ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
411        } \
412        } \
413    } while (0)
414
415/* Read a code point form the string's canonical representation.  No checks
416   or ready calls are performed. */
417#define PyUnicode_READ(kind, data, index) \
418    ((Py_UCS4) \
419    ((kind) == PyUnicode_1BYTE_KIND ? \
420        ((const Py_UCS1 *)(data))[(index)] : \
421        ((kind) == PyUnicode_2BYTE_KIND ? \
422            ((const Py_UCS2 *)(data))[(index)] : \
423            ((const Py_UCS4 *)(data))[(index)] \
424        ) \
425    ))
426
427/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
428   calls PyUnicode_KIND() and might call it twice.  For single reads, use
429   PyUnicode_READ_CHAR, for multiple consecutive reads callers should
430   cache kind and use PyUnicode_READ instead. */
431#define PyUnicode_READ_CHAR(unicode, index) \
432    (assert(PyUnicode_Check(unicode)),          \
433     assert(PyUnicode_IS_READY(unicode)),       \
434     (Py_UCS4)                                  \
435        (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
436            ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
437            (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
438                ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
439                ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
440            ) \
441        ))
442
443/* Returns the length of the unicode string. The caller has to make sure that
444   the string has it's canonical representation set before calling
445   this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
446#define PyUnicode_GET_LENGTH(op)                \
447    (assert(PyUnicode_Check(op)),               \
448     assert(PyUnicode_IS_READY(op)),            \
449     ((PyASCIIObject *)(op))->length)
450
451
452/* Fast check to determine whether an object is ready. Equivalent to
453   PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
454
455#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
456
457/* PyUnicode_READY() does less work than PyUnicode_Ready() in the best
458   case.  If the canonical representation is not yet set, it will still call
459   PyUnicode_Ready().
460   Returns 0 on success and -1 on errors. */
461#define PyUnicode_READY(op)                        \
462    (assert(PyUnicode_Check(op)),                       \
463     (PyUnicode_IS_READY(op) ?                          \
464      0 : _PyUnicode_Ready((PyObject *)(op))))
465
466/* Return a maximum character value which is suitable for creating another
467   string based on op.  This is always an approximation but more efficient
468   than interating over the string. */
469#define PyUnicode_MAX_CHAR_VALUE(op) \
470    (assert(PyUnicode_IS_READY(op)),                                    \
471     (PyUnicode_IS_COMPACT_ASCII(op) ? 0x7f:                            \
472      (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
473       (PyUnicode_DATA(op) == (((PyCompactUnicodeObject *)(op))->utf8) ? \
474        (0x7fU) : (0xffU)                                                 \
475           ) :                                                          \
476       (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
477        (0xffffU) : (0x10ffffU)                                           \
478           ))))
479
480#endif
481
482/* --- Constants ---------------------------------------------------------- */
483
484/* This Unicode character will be used as replacement character during
485   decoding if the errors argument is set to "replace". Note: the
486   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
487   Unicode 3.0. */
488
489#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
490
491/* === Public API ========================================================= */
492
493/* --- Plain Py_UNICODE --------------------------------------------------- */
494
495/* With PEP 393, this is the recommended way to allocate a new unicode object.
496   This function will allocate the object and its buffer in a single memory
497   block.  Objects created using this function are not resizable. */
498#ifndef Py_LIMITED_API
499PyAPI_FUNC(PyObject*) PyUnicode_New(
500    Py_ssize_t size,            /* Number of code points in the new string */
501    Py_UCS4 maxchar             /* maximum code point value in the string */
502    );
503#endif
504
505/* Initializes the canonical string representation from a the deprecated
506   wstr/Py_UNICODE representation. This function is used to convert Unicode
507   objects which were created using the old API to the new flexible format
508   introduced with PEP 393.
509
510   Don't call this function directly, use the public PyUnicode_READY() macro
511   instead. */
512#ifndef Py_LIMITED_API
513PyAPI_FUNC(int) _PyUnicode_Ready(
514    PyObject *unicode           /* Unicode object */
515    );
516#endif
517
518/* Get a copy of a Unicode string. */
519PyAPI_FUNC(PyObject*) PyUnicode_Copy(
520    PyObject *unicode
521    );
522
523/* Copy character from one unicode object into another, this function performs
524   character conversion when necessary and falls back to memcpy if possible.
525
526   Fail if to is too small (smaller than how_many or smaller than
527   len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
528   kind(to), or if to has more than 1 reference.
529
530   Return the number of written character, or return -1 and raise an exception
531   on error.
532
533   Pseudo-code:
534
535       how_many = min(how_many, len(from) - from_start)
536       to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
537       return how_many
538
539   Note: The function doesn't write a terminating null character.
540   */
541#ifndef Py_LIMITED_API
542PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
543    PyObject *to,
544    Py_ssize_t to_start,
545    PyObject *from,
546    Py_ssize_t from_start,
547    Py_ssize_t how_many
548    );
549#endif
550
551/* Create a Unicode Object from the Py_UNICODE buffer u of the given
552   size.
553
554   u may be NULL which causes the contents to be undefined. It is the
555   user's responsibility to fill in the needed data afterwards. Note
556   that modifying the Unicode object contents after construction is
557   only allowed if u was set to NULL.
558
559   The buffer is copied into the new object. */
560
561#ifndef Py_LIMITED_API
562PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
563    const Py_UNICODE *u,        /* Unicode buffer */
564    Py_ssize_t size             /* size of buffer */
565    );
566#endif
567
568/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
569PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
570    const char *u,             /* UTF-8 encoded string */
571    Py_ssize_t size            /* size of buffer */
572    );
573
574/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
575   UTF-8 encoded bytes.  The size is determined with strlen(). */
576PyAPI_FUNC(PyObject*) PyUnicode_FromString(
577    const char *u              /* UTF-8 encoded string */
578    );
579
580#ifndef Py_LIMITED_API
581PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
582    int kind,
583    const void *buffer,
584    Py_ssize_t size);
585#endif
586
587PyAPI_FUNC(PyObject*) PyUnicode_Substring(
588    PyObject *str,
589    Py_ssize_t start,
590    Py_ssize_t end);
591
592/* Copy the string into a UCS4 buffer including the null character is copy_null
593   is set. Return NULL and raise an exception on error. Raise a ValueError if
594   the buffer is smaller than the string. Return buffer on success.
595
596   buflen is the length of the buffer in (Py_UCS4) characters. */
597PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
598    PyObject *unicode,
599    Py_UCS4* buffer,
600    Py_ssize_t buflen,
601    int copy_null);
602
603/* Copy the string into a UCS4 buffer. A new buffer is allocated using
604 * PyMem_Malloc; if this fails, NULL is returned with a memory error
605   exception set. */
606PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
607
608/* Return a read-only pointer to the Unicode object's internal
609   Py_UNICODE buffer.
610   If the wchar_t/Py_UNICODE representation is not yet available, this
611   function will calculate it. */
612
613#ifndef Py_LIMITED_API
614PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
615    PyObject *unicode           /* Unicode object */
616    );
617#endif
618
619/* Return a read-only pointer to the Unicode object's internal
620   Py_UNICODE buffer and save the length at size.
621   If the wchar_t/Py_UNICODE representation is not yet available, this
622   function will calculate it. */
623
624#ifndef Py_LIMITED_API
625PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
626    PyObject *unicode,          /* Unicode object */
627    Py_ssize_t *size            /* location where to save the length */
628    );
629#endif
630
631/* Get the length of the Unicode object. */
632
633PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
634    PyObject *unicode
635);
636
637/* Get the number of Py_UNICODE units in the
638   string representation. */
639
640PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
641    PyObject *unicode           /* Unicode object */
642    );
643
644/* Read a character from the string. */
645
646PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
647    PyObject *unicode,
648    Py_ssize_t index
649    );
650
651/* Write a character to the string. The string must have been created through
652   PyUnicode_New, must not be shared, and must not have been hashed yet.
653
654   Return 0 on success, -1 on error. */
655
656PyAPI_FUNC(int) PyUnicode_WriteChar(
657    PyObject *unicode,
658    Py_ssize_t index,
659    Py_UCS4 character
660    );
661
662#ifndef Py_LIMITED_API
663/* Get the maximum ordinal for a Unicode character. */
664PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
665#endif
666
667/* Resize an already allocated Unicode object to the new size length.
668
669   *unicode is modified to point to the new (resized) object and 0
670   returned on success.
671
672   This API may only be called by the function which also called the
673   Unicode constructor. The refcount on the object must be 1. Otherwise,
674   an error is returned.
675
676   Error handling is implemented as follows: an exception is set, -1
677   is returned and *unicode left untouched.
678
679*/
680
681PyAPI_FUNC(int) PyUnicode_Resize(
682    PyObject **unicode,         /* Pointer to the Unicode object */
683    Py_ssize_t length           /* New length */
684    );
685
686/* Coerce obj to an Unicode object and return a reference with
687   *incremented* refcount.
688
689   Coercion is done in the following way:
690
691   1. bytes, bytearray and other char buffer compatible objects are decoded
692      under the assumptions that they contain data using the UTF-8
693      encoding. Decoding is done in "strict" mode.
694
695   2. All other objects (including Unicode objects) raise an
696      exception.
697
698   The API returns NULL in case of an error. The caller is responsible
699   for decref'ing the returned objects.
700
701*/
702
703PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
704    register PyObject *obj,     /* Object */
705    const char *encoding,       /* encoding */
706    const char *errors          /* error handling */
707    );
708
709/* Coerce obj to an Unicode object and return a reference with
710   *incremented* refcount.
711
712   Unicode objects are passed back as-is (subclasses are converted to
713   true Unicode objects), all other objects are delegated to
714   PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
715   using UTF-8 encoding as basis for decoding the object.
716
717   The API returns NULL in case of an error. The caller is responsible
718   for decref'ing the returned objects.
719
720*/
721
722PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
723    register PyObject *obj      /* Object */
724    );
725
726PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
727    const char *format,   /* ASCII-encoded string  */
728    va_list vargs
729    );
730PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
731    const char *format,   /* ASCII-encoded string  */
732    ...
733    );
734
735#ifndef Py_LIMITED_API
736/* Format the object based on the format_spec, as defined in PEP 3101
737   (Advanced String Formatting). */
738PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
739                                                 PyObject *format_spec,
740                                                 Py_ssize_t start,
741                                                 Py_ssize_t end);
742#endif
743
744PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
745PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
746PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
747    const char *u              /* UTF-8 encoded string */
748    );
749#ifndef Py_LIMITED_API
750PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
751#endif
752
753/* Use only if you know it's a string */
754#define PyUnicode_CHECK_INTERNED(op) \
755    (((PyASCIIObject *)(op))->state.interned)
756
757/* --- wchar_t support for platforms which support it --------------------- */
758
759#ifdef HAVE_WCHAR_H
760
761/* Create a Unicode Object from the wchar_t buffer w of the given
762   size.
763
764   The buffer is copied into the new object. */
765
766PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
767    register const wchar_t *w,  /* wchar_t buffer */
768    Py_ssize_t size             /* size of buffer */
769    );
770
771/* Copies the Unicode Object contents into the wchar_t buffer w.  At
772   most size wchar_t characters are copied.
773
774   Note that the resulting wchar_t string may or may not be
775   0-terminated.  It is the responsibility of the caller to make sure
776   that the wchar_t string is 0-terminated in case this is required by
777   the application.
778
779   Returns the number of wchar_t characters copied (excluding a
780   possibly trailing 0-termination character) or -1 in case of an
781   error. */
782
783PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
784    PyObject *unicode,          /* Unicode object */
785    register wchar_t *w,        /* wchar_t buffer */
786    Py_ssize_t size             /* size of buffer */
787    );
788
789/* Convert the Unicode object to a wide character string. The output string
790   always ends with a nul character. If size is not NULL, write the number of
791   wide characters (excluding the null character) into *size.
792
793   Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
794   on success. On error, returns NULL, *size is undefined and raises a
795   MemoryError. */
796
797PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
798    PyObject *unicode,          /* Unicode object */
799    Py_ssize_t *size            /* number of characters of the result */
800    );
801
802#ifndef Py_LIMITED_API
803PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
804#endif
805
806#endif
807
808/* --- Unicode ordinals --------------------------------------------------- */
809
810/* Create a Unicode Object from the given Unicode code point ordinal.
811
812   The ordinal must be in range(0x10000) on narrow Python builds
813   (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
814   raised in case it is not.
815
816*/
817
818PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
819
820/* --- Free-list management ----------------------------------------------- */
821
822/* Clear the free list used by the Unicode implementation.
823
824   This can be used to release memory used for objects on the free
825   list back to the Python memory allocator.
826
827*/
828
829PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
830
831/* === Builtin Codecs =====================================================
832
833   Many of these APIs take two arguments encoding and errors. These
834   parameters encoding and errors have the same semantics as the ones
835   of the builtin str() API.
836
837   Setting encoding to NULL causes the default encoding (UTF-8) to be used.
838
839   Error handling is set by errors which may also be set to NULL
840   meaning to use the default handling defined for the codec. Default
841   error handling for all builtin codecs is "strict" (ValueErrors are
842   raised).
843
844   The codecs all use a similar interface. Only deviation from the
845   generic ones are documented.
846
847*/
848
849/* --- Manage the default encoding ---------------------------------------- */
850
851/* Returns a pointer to the default encoding (UTF-8) of the
852   Unicode object unicode and the size of the encoded representation
853   in bytes stored in *size.
854
855   In case of an error, no *size is set.
856
857   This funcation caches the UTF-8 encoded string in the unicodeobject
858   and subsequent calls will return the same string.  The memory is relased
859   when the unicodeobject is deallocated.
860
861   _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
862   support the previous internal function with the same behaviour.
863
864   *** This API is for interpreter INTERNAL USE ONLY and will likely
865   *** be removed or changed in the future.
866
867   *** If you need to access the Unicode object as UTF-8 bytes string,
868   *** please use PyUnicode_AsUTF8String() instead.
869*/
870
871#ifndef Py_LIMITED_API
872PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
873    PyObject *unicode,
874    Py_ssize_t *size);
875#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
876#endif
877
878/* Returns a pointer to the default encoding (UTF-8) of the
879   Unicode object unicode.
880
881   Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
882   in the unicodeobject.
883
884   _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
885   support the previous internal function with the same behaviour.
886
887   Use of this API is DEPRECATED since no size information can be
888   extracted from the returned data.
889
890   *** This API is for interpreter INTERNAL USE ONLY and will likely
891   *** be removed or changed for Python 3.1.
892
893   *** If you need to access the Unicode object as UTF-8 bytes string,
894   *** please use PyUnicode_AsUTF8String() instead.
895
896*/
897
898#ifndef Py_LIMITED_API
899PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
900#define _PyUnicode_AsString PyUnicode_AsUTF8
901#endif
902
903/* Returns "utf-8".  */
904
905PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
906
907/* --- Generic Codecs ----------------------------------------------------- */
908
909/* Create a Unicode object by decoding the encoded string s of the
910   given size. */
911
912PyAPI_FUNC(PyObject*) PyUnicode_Decode(
913    const char *s,              /* encoded string */
914    Py_ssize_t size,            /* size of buffer */
915    const char *encoding,       /* encoding */
916    const char *errors          /* error handling */
917    );
918
919/* Decode a Unicode object unicode and return the result as Python
920   object. */
921
922PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
923    PyObject *unicode,          /* Unicode object */
924    const char *encoding,       /* encoding */
925    const char *errors          /* error handling */
926    );
927
928/* Decode a Unicode object unicode and return the result as Unicode
929   object. */
930
931PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
932    PyObject *unicode,          /* Unicode object */
933    const char *encoding,       /* encoding */
934    const char *errors          /* error handling */
935    );
936
937/* Encodes a Py_UNICODE buffer of the given size and returns a
938   Python string object. */
939
940#ifndef Py_LIMITED_API
941PyAPI_FUNC(PyObject*) PyUnicode_Encode(
942    const Py_UNICODE *s,        /* Unicode char buffer */
943    Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
944    const char *encoding,       /* encoding */
945    const char *errors          /* error handling */
946    );
947#endif
948
949/* Encodes a Unicode object and returns the result as Python
950   object. */
951
952PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
953    PyObject *unicode,          /* Unicode object */
954    const char *encoding,       /* encoding */
955    const char *errors          /* error handling */
956    );
957
958/* Encodes a Unicode object and returns the result as Python string
959   object. */
960
961PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
962    PyObject *unicode,          /* Unicode object */
963    const char *encoding,       /* encoding */
964    const char *errors          /* error handling */
965    );
966
967/* Encodes a Unicode object and returns the result as Unicode
968   object. */
969
970PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
971    PyObject *unicode,          /* Unicode object */
972    const char *encoding,       /* encoding */
973    const char *errors          /* error handling */
974    );
975
976/* Build an encoding map. */
977
978PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
979    PyObject* string            /* 256 character map */
980   );
981
982/* --- UTF-7 Codecs ------------------------------------------------------- */
983
984PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
985    const char *string,         /* UTF-7 encoded string */
986    Py_ssize_t length,          /* size of string */
987    const char *errors          /* error handling */
988    );
989
990PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
991    const char *string,         /* UTF-7 encoded string */
992    Py_ssize_t length,          /* size of string */
993    const char *errors,         /* error handling */
994    Py_ssize_t *consumed        /* bytes consumed */
995    );
996
997#ifndef Py_LIMITED_API
998PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
999    const Py_UNICODE *data,     /* Unicode char buffer */
1000    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1001    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
1002    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
1003    const char *errors          /* error handling */
1004    );
1005#endif
1006
1007/* --- UTF-8 Codecs ------------------------------------------------------- */
1008
1009PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1010    const char *string,         /* UTF-8 encoded string */
1011    Py_ssize_t length,          /* size of string */
1012    const char *errors          /* error handling */
1013    );
1014
1015PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1016    const char *string,         /* UTF-8 encoded string */
1017    Py_ssize_t length,          /* size of string */
1018    const char *errors,         /* error handling */
1019    Py_ssize_t *consumed        /* bytes consumed */
1020    );
1021
1022PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1023    PyObject *unicode           /* Unicode object */
1024    );
1025
1026#ifndef Py_LIMITED_API
1027PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1028    PyObject *unicode,
1029    const char *errors);
1030
1031PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1032    const Py_UNICODE *data,     /* Unicode char buffer */
1033    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1034    const char *errors          /* error handling */
1035    );
1036#endif
1037
1038/* --- UTF-32 Codecs ------------------------------------------------------ */
1039
1040/* Decodes length bytes from a UTF-32 encoded buffer string and returns
1041   the corresponding Unicode object.
1042
1043   errors (if non-NULL) defines the error handling. It defaults
1044   to "strict".
1045
1046   If byteorder is non-NULL, the decoder starts decoding using the
1047   given byte order:
1048
1049    *byteorder == -1: little endian
1050    *byteorder == 0:  native order
1051    *byteorder == 1:  big endian
1052
1053   In native mode, the first four bytes of the stream are checked for a
1054   BOM mark. If found, the BOM mark is analysed, the byte order
1055   adjusted and the BOM skipped.  In the other modes, no BOM mark
1056   interpretation is done. After completion, *byteorder is set to the
1057   current byte order at the end of input data.
1058
1059   If byteorder is NULL, the codec starts in native order mode.
1060
1061*/
1062
1063PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1064    const char *string,         /* UTF-32 encoded string */
1065    Py_ssize_t length,          /* size of string */
1066    const char *errors,         /* error handling */
1067    int *byteorder              /* pointer to byteorder to use
1068                                   0=native;-1=LE,1=BE; updated on
1069                                   exit */
1070    );
1071
1072PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1073    const char *string,         /* UTF-32 encoded string */
1074    Py_ssize_t length,          /* size of string */
1075    const char *errors,         /* error handling */
1076    int *byteorder,             /* pointer to byteorder to use
1077                                   0=native;-1=LE,1=BE; updated on
1078                                   exit */
1079    Py_ssize_t *consumed        /* bytes consumed */
1080    );
1081
1082/* Returns a Python string using the UTF-32 encoding in native byte
1083   order. The string always starts with a BOM mark.  */
1084
1085PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1086    PyObject *unicode           /* Unicode object */
1087    );
1088
1089/* Returns a Python string object holding the UTF-32 encoded value of
1090   the Unicode data.
1091
1092   If byteorder is not 0, output is written according to the following
1093   byte order:
1094
1095   byteorder == -1: little endian
1096   byteorder == 0:  native byte order (writes a BOM mark)
1097   byteorder == 1:  big endian
1098
1099   If byteorder is 0, the output string will always start with the
1100   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1101   prepended.
1102
1103*/
1104
1105#ifndef Py_LIMITED_API
1106PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1107    const Py_UNICODE *data,     /* Unicode char buffer */
1108    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1109    const char *errors,         /* error handling */
1110    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1111    );
1112#endif
1113
1114/* --- UTF-16 Codecs ------------------------------------------------------ */
1115
1116/* Decodes length bytes from a UTF-16 encoded buffer string and returns
1117   the corresponding Unicode object.
1118
1119   errors (if non-NULL) defines the error handling. It defaults
1120   to "strict".
1121
1122   If byteorder is non-NULL, the decoder starts decoding using the
1123   given byte order:
1124
1125    *byteorder == -1: little endian
1126    *byteorder == 0:  native order
1127    *byteorder == 1:  big endian
1128
1129   In native mode, the first two bytes of the stream are checked for a
1130   BOM mark. If found, the BOM mark is analysed, the byte order
1131   adjusted and the BOM skipped.  In the other modes, no BOM mark
1132   interpretation is done. After completion, *byteorder is set to the
1133   current byte order at the end of input data.
1134
1135   If byteorder is NULL, the codec starts in native order mode.
1136
1137*/
1138
1139PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1140    const char *string,         /* UTF-16 encoded string */
1141    Py_ssize_t length,          /* size of string */
1142    const char *errors,         /* error handling */
1143    int *byteorder              /* pointer to byteorder to use
1144                                   0=native;-1=LE,1=BE; updated on
1145                                   exit */
1146    );
1147
1148PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1149    const char *string,         /* UTF-16 encoded string */
1150    Py_ssize_t length,          /* size of string */
1151    const char *errors,         /* error handling */
1152    int *byteorder,             /* pointer to byteorder to use
1153                                   0=native;-1=LE,1=BE; updated on
1154                                   exit */
1155    Py_ssize_t *consumed        /* bytes consumed */
1156    );
1157
1158/* Returns a Python string using the UTF-16 encoding in native byte
1159   order. The string always starts with a BOM mark.  */
1160
1161PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1162    PyObject *unicode           /* Unicode object */
1163    );
1164
1165/* Returns a Python string object holding the UTF-16 encoded value of
1166   the Unicode data.
1167
1168   If byteorder is not 0, output is written according to the following
1169   byte order:
1170
1171   byteorder == -1: little endian
1172   byteorder == 0:  native byte order (writes a BOM mark)
1173   byteorder == 1:  big endian
1174
1175   If byteorder is 0, the output string will always start with the
1176   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1177   prepended.
1178
1179   Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1180   UCS-2. This trick makes it possible to add full UTF-16 capabilities
1181   at a later point without compromising the APIs.
1182
1183*/
1184
1185#ifndef Py_LIMITED_API
1186PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1187    const Py_UNICODE *data,     /* Unicode char buffer */
1188    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1189    const char *errors,         /* error handling */
1190    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1191    );
1192#endif
1193
1194/* --- Unicode-Escape Codecs ---------------------------------------------- */
1195
1196PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1197    const char *string,         /* Unicode-Escape encoded string */
1198    Py_ssize_t length,          /* size of string */
1199    const char *errors          /* error handling */
1200    );
1201
1202PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1203    PyObject *unicode           /* Unicode object */
1204    );
1205
1206#ifndef Py_LIMITED_API
1207PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1208    const Py_UNICODE *data,     /* Unicode char buffer */
1209    Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1210    );
1211#endif
1212
1213/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1214
1215PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1216    const char *string,         /* Raw-Unicode-Escape encoded string */
1217    Py_ssize_t length,          /* size of string */
1218    const char *errors          /* error handling */
1219    );
1220
1221PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1222    PyObject *unicode           /* Unicode object */
1223    );
1224
1225#ifndef Py_LIMITED_API
1226PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1227    const Py_UNICODE *data,     /* Unicode char buffer */
1228    Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1229    );
1230#endif
1231
1232/* --- Unicode Internal Codec ---------------------------------------------
1233
1234    Only for internal use in _codecsmodule.c */
1235
1236#ifndef Py_LIMITED_API
1237PyObject *_PyUnicode_DecodeUnicodeInternal(
1238    const char *string,
1239    Py_ssize_t length,
1240    const char *errors
1241    );
1242#endif
1243
1244/* --- Latin-1 Codecs -----------------------------------------------------
1245
1246   Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1247
1248*/
1249
1250PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1251    const char *string,         /* Latin-1 encoded string */
1252    Py_ssize_t length,          /* size of string */
1253    const char *errors          /* error handling */
1254    );
1255
1256PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1257    PyObject *unicode           /* Unicode object */
1258    );
1259
1260#ifndef Py_LIMITED_API
1261PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1262    PyObject* unicode,
1263    const char* errors);
1264
1265PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1266    const Py_UNICODE *data,     /* Unicode char buffer */
1267    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1268    const char *errors          /* error handling */
1269    );
1270#endif
1271
1272/* --- ASCII Codecs -------------------------------------------------------
1273
1274   Only 7-bit ASCII data is excepted. All other codes generate errors.
1275
1276*/
1277
1278PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1279    const char *string,         /* ASCII encoded string */
1280    Py_ssize_t length,          /* size of string */
1281    const char *errors          /* error handling */
1282    );
1283
1284PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1285    PyObject *unicode           /* Unicode object */
1286    );
1287
1288#ifndef Py_LIMITED_API
1289PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1290    PyObject* unicode,
1291    const char* errors);
1292
1293PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1294    const Py_UNICODE *data,     /* Unicode char buffer */
1295    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1296    const char *errors          /* error handling */
1297    );
1298#endif
1299
1300/* --- Character Map Codecs -----------------------------------------------
1301
1302   This codec uses mappings to encode and decode characters.
1303
1304   Decoding mappings must map single string characters to single
1305   Unicode characters, integers (which are then interpreted as Unicode
1306   ordinals) or None (meaning "undefined mapping" and causing an
1307   error).
1308
1309   Encoding mappings must map single Unicode characters to single
1310   string characters, integers (which are then interpreted as Latin-1
1311   ordinals) or None (meaning "undefined mapping" and causing an
1312   error).
1313
1314   If a character lookup fails with a LookupError, the character is
1315   copied as-is meaning that its ordinal value will be interpreted as
1316   Unicode or Latin-1 ordinal resp. Because of this mappings only need
1317   to contain those mappings which map characters to different code
1318   points.
1319
1320*/
1321
1322PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1323    const char *string,         /* Encoded string */
1324    Py_ssize_t length,          /* size of string */
1325    PyObject *mapping,          /* character mapping
1326                                   (char ordinal -> unicode ordinal) */
1327    const char *errors          /* error handling */
1328    );
1329
1330PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1331    PyObject *unicode,          /* Unicode object */
1332    PyObject *mapping           /* character mapping
1333                                   (unicode ordinal -> char ordinal) */
1334    );
1335
1336#ifndef Py_LIMITED_API
1337PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1338    const Py_UNICODE *data,     /* Unicode char buffer */
1339    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1340    PyObject *mapping,          /* character mapping
1341                                   (unicode ordinal -> char ordinal) */
1342    const char *errors          /* error handling */
1343    );
1344#endif
1345
1346/* Translate a Py_UNICODE buffer of the given length by applying a
1347   character mapping table to it and return the resulting Unicode
1348   object.
1349
1350   The mapping table must map Unicode ordinal integers to Unicode
1351   ordinal integers or None (causing deletion of the character).
1352
1353   Mapping tables may be dictionaries or sequences. Unmapped character
1354   ordinals (ones which cause a LookupError) are left untouched and
1355   are copied as-is.
1356
1357*/
1358
1359#ifndef Py_LIMITED_API
1360PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1361    const Py_UNICODE *data,     /* Unicode char buffer */
1362    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1363    PyObject *table,            /* Translate table */
1364    const char *errors          /* error handling */
1365    );
1366#endif
1367
1368#ifdef HAVE_MBCS
1369
1370/* --- MBCS codecs for Windows -------------------------------------------- */
1371
1372PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1373    const char *string,         /* MBCS encoded string */
1374    Py_ssize_t length,              /* size of string */
1375    const char *errors          /* error handling */
1376    );
1377
1378PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1379    const char *string,         /* MBCS encoded string */
1380    Py_ssize_t length,          /* size of string */
1381    const char *errors,         /* error handling */
1382    Py_ssize_t *consumed        /* bytes consumed */
1383    );
1384
1385PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1386    PyObject *unicode           /* Unicode object */
1387    );
1388
1389#ifndef Py_LIMITED_API
1390PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1391    const Py_UNICODE *data,     /* Unicode char buffer */
1392    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1393    const char *errors          /* error handling */
1394    );
1395#endif
1396
1397#endif /* HAVE_MBCS */
1398
1399/* --- Decimal Encoder ---------------------------------------------------- */
1400
1401/* Takes a Unicode string holding a decimal value and writes it into
1402   an output buffer using standard ASCII digit codes.
1403
1404   The output buffer has to provide at least length+1 bytes of storage
1405   area. The output string is 0-terminated.
1406
1407   The encoder converts whitespace to ' ', decimal characters to their
1408   corresponding ASCII digit and all other Latin-1 characters except
1409   \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1410   are treated as errors. This includes embedded NULL bytes.
1411
1412   Error handling is defined by the errors argument:
1413
1414      NULL or "strict": raise a ValueError
1415      "ignore": ignore the wrong characters (these are not copied to the
1416                output buffer)
1417      "replace": replaces illegal characters with '?'
1418
1419   Returns 0 on success, -1 on failure.
1420
1421*/
1422
1423#ifndef Py_LIMITED_API
1424PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1425    Py_UNICODE *s,              /* Unicode buffer */
1426    Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1427    char *output,               /* Output buffer; must have size >= length */
1428    const char *errors          /* error handling */
1429    );
1430#endif
1431
1432/* Transforms code points that have decimal digit property to the
1433   corresponding ASCII digit code points.
1434
1435   Returns a new Unicode string on success, NULL on failure.
1436*/
1437
1438#ifndef Py_LIMITED_API
1439PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1440    Py_UNICODE *s,              /* Unicode buffer */
1441    Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
1442    );
1443#endif
1444
1445/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyUnicodeObject
1446   as argument instead of a raw buffer and length.  This function additionally
1447   transforms spaces to ASCII because this is what the callers in longobject,
1448   floatobject, and complexobject did anyways. */
1449
1450#ifndef Py_LIMITED_API
1451PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1452    PyObject *unicode           /* Unicode object */
1453    );
1454#endif
1455
1456/* --- File system encoding ---------------------------------------------- */
1457
1458/* ParseTuple converter: encode str objects to bytes using
1459   PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
1460
1461PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1462
1463/* ParseTuple converter: decode bytes objects to unicode using
1464   PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1465
1466PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1467
1468/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1469   and the "surrogateescape" error handler.
1470
1471   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1472   encoding.
1473
1474   Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1475*/
1476
1477PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1478    const char *s               /* encoded string */
1479    );
1480
1481/* Decode a string using Py_FileSystemDefaultEncoding
1482   and the "surrogateescape" error handler.
1483
1484   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1485   encoding.
1486*/
1487
1488PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1489    const char *s,               /* encoded string */
1490    Py_ssize_t size              /* size */
1491    );
1492
1493/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1494   "surrogateescape" error handler, and return bytes.
1495
1496   If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1497   encoding.
1498*/
1499
1500PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1501    PyObject *unicode
1502    );
1503
1504/* --- Methods & Slots ----------------------------------------------------
1505
1506   These are capable of handling Unicode objects and strings on input
1507   (we refer to them as strings in the descriptions) and return
1508   Unicode objects or integers as apporpriate. */
1509
1510/* Concat two strings giving a new Unicode string. */
1511
1512PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1513    PyObject *left,             /* Left string */
1514    PyObject *right             /* Right string */
1515    );
1516
1517/* Concat two strings and put the result in *pleft
1518   (sets *pleft to NULL on error) */
1519
1520PyAPI_FUNC(void) PyUnicode_Append(
1521    PyObject **pleft,           /* Pointer to left string */
1522    PyObject *right             /* Right string */
1523    );
1524
1525/* Concat two strings, put the result in *pleft and drop the right object
1526   (sets *pleft to NULL on error) */
1527
1528PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1529    PyObject **pleft,           /* Pointer to left string */
1530    PyObject *right             /* Right string */
1531    );
1532
1533/* Split a string giving a list of Unicode strings.
1534
1535   If sep is NULL, splitting will be done at all whitespace
1536   substrings. Otherwise, splits occur at the given separator.
1537
1538   At most maxsplit splits will be done. If negative, no limit is set.
1539
1540   Separators are not included in the resulting list.
1541
1542*/
1543
1544PyAPI_FUNC(PyObject*) PyUnicode_Split(
1545    PyObject *s,                /* String to split */
1546    PyObject *sep,              /* String separator */
1547    Py_ssize_t maxsplit         /* Maxsplit count */
1548    );
1549
1550/* Dito, but split at line breaks.
1551
1552   CRLF is considered to be one line break. Line breaks are not
1553   included in the resulting list. */
1554
1555PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1556    PyObject *s,                /* String to split */
1557    int keepends                /* If true, line end markers are included */
1558    );
1559
1560/* Partition a string using a given separator. */
1561
1562PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1563    PyObject *s,                /* String to partition */
1564    PyObject *sep               /* String separator */
1565    );
1566
1567/* Partition a string using a given separator, searching from the end of the
1568   string. */
1569
1570PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1571    PyObject *s,                /* String to partition */
1572    PyObject *sep               /* String separator */
1573    );
1574
1575/* Split a string giving a list of Unicode strings.
1576
1577   If sep is NULL, splitting will be done at all whitespace
1578   substrings. Otherwise, splits occur at the given separator.
1579
1580   At most maxsplit splits will be done. But unlike PyUnicode_Split
1581   PyUnicode_RSplit splits from the end of the string. If negative,
1582   no limit is set.
1583
1584   Separators are not included in the resulting list.
1585
1586*/
1587
1588PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1589    PyObject *s,                /* String to split */
1590    PyObject *sep,              /* String separator */
1591    Py_ssize_t maxsplit         /* Maxsplit count */
1592    );
1593
1594/* Translate a string by applying a character mapping table to it and
1595   return the resulting Unicode object.
1596
1597   The mapping table must map Unicode ordinal integers to Unicode
1598   ordinal integers or None (causing deletion of the character).
1599
1600   Mapping tables may be dictionaries or sequences. Unmapped character
1601   ordinals (ones which cause a LookupError) are left untouched and
1602   are copied as-is.
1603
1604*/
1605
1606PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1607    PyObject *str,              /* String */
1608    PyObject *table,            /* Translate table */
1609    const char *errors          /* error handling */
1610    );
1611
1612/* Join a sequence of strings using the given separator and return
1613   the resulting Unicode string. */
1614
1615PyAPI_FUNC(PyObject*) PyUnicode_Join(
1616    PyObject *separator,        /* Separator string */
1617    PyObject *seq               /* Sequence object */
1618    );
1619
1620/* Return 1 if substr matches str[start:end] at the given tail end, 0
1621   otherwise. */
1622
1623PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1624    PyObject *str,              /* String */
1625    PyObject *substr,           /* Prefix or Suffix string */
1626    Py_ssize_t start,           /* Start index */
1627    Py_ssize_t end,             /* Stop index */
1628    int direction               /* Tail end: -1 prefix, +1 suffix */
1629    );
1630
1631/* Return the first position of substr in str[start:end] using the
1632   given search direction or -1 if not found. -2 is returned in case
1633   an error occurred and an exception is set. */
1634
1635PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1636    PyObject *str,              /* String */
1637    PyObject *substr,           /* Substring to find */
1638    Py_ssize_t start,           /* Start index */
1639    Py_ssize_t end,             /* Stop index */
1640    int direction               /* Find direction: +1 forward, -1 backward */
1641    );
1642
1643/* Like PyUnicode_Find, but search for single character only. */
1644PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
1645    PyObject *str,
1646    Py_UCS4 ch,
1647    Py_ssize_t start,
1648    Py_ssize_t end,
1649    int direction
1650    );
1651
1652/* Count the number of occurrences of substr in str[start:end]. */
1653
1654PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1655    PyObject *str,              /* String */
1656    PyObject *substr,           /* Substring to count */
1657    Py_ssize_t start,           /* Start index */
1658    Py_ssize_t end              /* Stop index */
1659    );
1660
1661/* Replace at most maxcount occurrences of substr in str with replstr
1662   and return the resulting Unicode object. */
1663
1664PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1665    PyObject *str,              /* String */
1666    PyObject *substr,           /* Substring to find */
1667    PyObject *replstr,          /* Substring to replace */
1668    Py_ssize_t maxcount         /* Max. number of replacements to apply;
1669                                   -1 = all */
1670    );
1671
1672/* Compare two strings and return -1, 0, 1 for less than, equal,
1673   greater than resp. */
1674
1675PyAPI_FUNC(int) PyUnicode_Compare(
1676    PyObject *left,             /* Left string */
1677    PyObject *right             /* Right string */
1678    );
1679
1680PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1681    PyObject *left,
1682    const char *right           /* ASCII-encoded string */
1683    );
1684
1685/* Rich compare two strings and return one of the following:
1686
1687   - NULL in case an exception was raised
1688   - Py_True or Py_False for successfuly comparisons
1689   - Py_NotImplemented in case the type combination is unknown
1690
1691   Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1692   case the conversion of the arguments to Unicode fails with a
1693   UnicodeDecodeError.
1694
1695   Possible values for op:
1696
1697     Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1698
1699*/
1700
1701PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
1702    PyObject *left,             /* Left string */
1703    PyObject *right,            /* Right string */
1704    int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
1705    );
1706
1707/* Apply a argument tuple or dictionary to a format string and return
1708   the resulting Unicode string. */
1709
1710PyAPI_FUNC(PyObject *) PyUnicode_Format(
1711    PyObject *format,           /* Format string */
1712    PyObject *args              /* Argument tuple or dictionary */
1713    );
1714
1715/* Checks whether element is contained in container and return 1/0
1716   accordingly.
1717
1718   element has to coerce to an one element Unicode string. -1 is
1719   returned in case of an error. */
1720
1721PyAPI_FUNC(int) PyUnicode_Contains(
1722    PyObject *container,        /* Container string */
1723    PyObject *element           /* Element string */
1724    );
1725
1726/* Checks whether argument is a valid identifier. */
1727
1728PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1729
1730#ifndef Py_LIMITED_API
1731/* Externally visible for str.strip(unicode) */
1732PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1733    PyUnicodeObject *self,
1734    int striptype,
1735    PyObject *sepobj
1736    );
1737#endif
1738
1739/* Using the current locale, insert the thousands grouping
1740   into the string pointed to by buffer.  For the argument descriptions,
1741   see Objects/stringlib/localeutil.h */
1742
1743#ifndef Py_LIMITED_API
1744PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1745                                                   Py_ssize_t n_buffer,
1746                                                   Py_UNICODE *digits,
1747                                                   Py_ssize_t n_digits,
1748                                                   Py_ssize_t min_width);
1749#endif
1750
1751/* Using explicit passed-in values, insert the thousands grouping
1752   into the string pointed to by buffer.  For the argument descriptions,
1753   see Objects/stringlib/localeutil.h */
1754#ifndef Py_LIMITED_API
1755PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1756    int kind,
1757    void *buffer,
1758    Py_ssize_t n_buffer,
1759    void *digits,
1760    Py_ssize_t n_digits,
1761    Py_ssize_t min_width,
1762    const char *grouping,
1763    const char *thousands_sep);
1764#endif
1765/* === Characters Type APIs =============================================== */
1766
1767/* Helper array used by Py_UNICODE_ISSPACE(). */
1768
1769#ifndef Py_LIMITED_API
1770PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1771
1772/* These should not be used directly. Use the Py_UNICODE_IS* and
1773   Py_UNICODE_TO* macros instead.
1774
1775   These APIs are implemented in Objects/unicodectype.c.
1776
1777*/
1778
1779PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1780    Py_UCS4 ch       /* Unicode character */
1781    );
1782
1783PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1784    Py_UCS4 ch       /* Unicode character */
1785    );
1786
1787PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1788    Py_UCS4 ch       /* Unicode character */
1789    );
1790
1791PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1792    Py_UCS4 ch       /* Unicode character */
1793    );
1794
1795PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1796    Py_UCS4 ch       /* Unicode character */
1797    );
1798
1799PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1800    const Py_UCS4 ch         /* Unicode character */
1801    );
1802
1803PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1804    const Py_UCS4 ch         /* Unicode character */
1805    );
1806
1807PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1808    Py_UCS4 ch       /* Unicode character */
1809    );
1810
1811PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1812    Py_UCS4 ch       /* Unicode character */
1813    );
1814
1815PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1816    Py_UCS4 ch       /* Unicode character */
1817    );
1818
1819PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1820    Py_UCS4 ch       /* Unicode character */
1821    );
1822
1823PyAPI_FUNC(int) _PyUnicode_ToDigit(
1824    Py_UCS4 ch       /* Unicode character */
1825    );
1826
1827PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1828    Py_UCS4 ch       /* Unicode character */
1829    );
1830
1831PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1832    Py_UCS4 ch       /* Unicode character */
1833    );
1834
1835PyAPI_FUNC(int) _PyUnicode_IsDigit(
1836    Py_UCS4 ch       /* Unicode character */
1837    );
1838
1839PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1840    Py_UCS4 ch       /* Unicode character */
1841    );
1842
1843PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1844    Py_UCS4 ch       /* Unicode character */
1845    );
1846
1847PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1848    Py_UCS4 ch       /* Unicode character */
1849    );
1850
1851PyAPI_FUNC(size_t) Py_UNICODE_strlen(
1852    const Py_UNICODE *u
1853    );
1854
1855PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
1856    Py_UNICODE *s1,
1857    const Py_UNICODE *s2);
1858
1859PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
1860    Py_UNICODE *s1, const Py_UNICODE *s2);
1861
1862PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
1863    Py_UNICODE *s1,
1864    const Py_UNICODE *s2,
1865    size_t n);
1866
1867PyAPI_FUNC(int) Py_UNICODE_strcmp(
1868    const Py_UNICODE *s1,
1869    const Py_UNICODE *s2
1870    );
1871
1872PyAPI_FUNC(int) Py_UNICODE_strncmp(
1873    const Py_UNICODE *s1,
1874    const Py_UNICODE *s2,
1875    size_t n
1876    );
1877
1878PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
1879    const Py_UNICODE *s,
1880    Py_UNICODE c
1881    );
1882
1883PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
1884    const Py_UNICODE *s,
1885    Py_UNICODE c
1886    );
1887
1888PyAPI_FUNC(size_t) Py_UCS4_strlen(
1889    const Py_UCS4 *u
1890    );
1891
1892PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcpy(
1893    Py_UCS4 *s1,
1894    const Py_UCS4 *s2);
1895
1896PyAPI_FUNC(Py_UCS4*) Py_UCS4_strcat(
1897    Py_UCS4 *s1, const Py_UCS4 *s2);
1898
1899PyAPI_FUNC(Py_UCS4*) Py_UCS4_strncpy(
1900    Py_UCS4 *s1,
1901    const Py_UCS4 *s2,
1902    size_t n);
1903
1904PyAPI_FUNC(int) Py_UCS4_strcmp(
1905    const Py_UCS4 *s1,
1906    const Py_UCS4 *s2
1907    );
1908
1909PyAPI_FUNC(int) Py_UCS4_strncmp(
1910    const Py_UCS4 *s1,
1911    const Py_UCS4 *s2,
1912    size_t n
1913    );
1914
1915PyAPI_FUNC(Py_UCS4*) Py_UCS4_strchr(
1916    const Py_UCS4 *s,
1917    Py_UCS4 c
1918    );
1919
1920PyAPI_FUNC(Py_UCS4*) Py_UCS4_strrchr(
1921    const Py_UCS4 *s,
1922    Py_UCS4 c
1923    );
1924
1925/* Create a copy of a unicode string ending with a nul character. Return NULL
1926   and raise a MemoryError exception on memory allocation failure, otherwise
1927   return a new allocated buffer (use PyMem_Free() to free the buffer). */
1928
1929PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
1930    PyObject *unicode
1931    );
1932#endif /* Py_LIMITED_API */
1933
1934#ifdef __cplusplus
1935}
1936#endif
1937#endif /* !Py_UNICODEOBJECT_H */
1938