1cdef extern from *: 2 # Return true if the object o is a Unicode object or an instance 3 # of a Unicode subtype. Changed in version 2.2: Allowed subtypes 4 # to be accepted. 5 bint PyUnicode_Check(object o) 6 7 # Return true if the object o is a Unicode object, but not an 8 # instance of a subtype. New in version 2.2. 9 bint PyUnicode_CheckExact(object o) 10 11 # Return the size of the object. o has to be a PyUnicodeObject 12 # (not checked). 13 Py_ssize_t PyUnicode_GET_SIZE(object o) 14 15 # Return the size of the object's internal buffer in bytes. o has 16 # to be a PyUnicodeObject (not checked). 17 Py_ssize_t PyUnicode_GET_DATA_SIZE(object o) 18 19 # Return a pointer to the internal Py_UNICODE buffer of the 20 # object. o has to be a PyUnicodeObject (not checked). 21 Py_UNICODE* PyUnicode_AS_UNICODE(object o) 22 23 # Return a pointer to the internal buffer of the object. o has to 24 # be a PyUnicodeObject (not checked). 25 char* PyUnicode_AS_DATA(object o) 26 27 # Return 1 or 0 depending on whether ch is a whitespace character. 28 bint Py_UNICODE_ISSPACE(Py_UNICODE ch) 29 30 # Return 1 or 0 depending on whether ch is a lowercase character. 31 bint Py_UNICODE_ISLOWER(Py_UNICODE ch) 32 33 # Return 1 or 0 depending on whether ch is an uppercase character. 34 bint Py_UNICODE_ISUPPER(Py_UNICODE ch) 35 36 # Return 1 or 0 depending on whether ch is a titlecase character. 37 bint Py_UNICODE_ISTITLE(Py_UNICODE ch) 38 39 # Return 1 or 0 depending on whether ch is a linebreak character. 40 bint Py_UNICODE_ISLINEBREAK(Py_UNICODE ch) 41 42 # Return 1 or 0 depending on whether ch is a decimal character. 43 bint Py_UNICODE_ISDECIMAL(Py_UNICODE ch) 44 45 # Return 1 or 0 depending on whether ch is a digit character. 46 bint Py_UNICODE_ISDIGIT(Py_UNICODE ch) 47 48 # Return 1 or 0 depending on whether ch is a numeric character. 49 bint Py_UNICODE_ISNUMERIC(Py_UNICODE ch) 50 51 # Return 1 or 0 depending on whether ch is an alphabetic character. 52 bint Py_UNICODE_ISALPHA(Py_UNICODE ch) 53 54 # Return 1 or 0 depending on whether ch is an alphanumeric character. 55 bint Py_UNICODE_ISALNUM(Py_UNICODE ch) 56 57 # Return the character ch converted to lower case. 58 Py_UNICODE Py_UNICODE_TOLOWER(Py_UNICODE ch) 59 60 # Return the character ch converted to upper case. 61 Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch) 62 63 # Return the character ch converted to title case. 64 Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch) 65 66 # Return the character ch converted to a decimal positive 67 # integer. Return -1 if this is not possible. This macro does not 68 # raise exceptions. 69 int Py_UNICODE_TODECIMAL(Py_UNICODE ch) 70 71 # Return the character ch converted to a single digit 72 # integer. Return -1 if this is not possible. This macro does not 73 # raise exceptions. 74 int Py_UNICODE_TODIGIT(Py_UNICODE ch) 75 76 # Return the character ch converted to a double. Return -1.0 if 77 # this is not possible. This macro does not raise exceptions. 78 double Py_UNICODE_TONUMERIC(Py_UNICODE ch) 79 80 # To create Unicode objects and access their basic sequence 81 # properties, use these APIs: 82 83 # Create a Unicode Object from the Py_UNICODE buffer u of the 84 # given size. u may be NULL which causes the contents to be 85 # undefined. It is the user's responsibility to fill in the needed 86 # data. The buffer is copied into the new object. If the buffer is 87 # not NULL, the return value might be a shared object. Therefore, 88 # modification of the resulting Unicode object is only allowed 89 # when u is NULL. 90 object PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size) 91 92 # Create a Unicode Object from the given Unicode code point ordinal. 93 # 94 # The ordinal must be in range(0x10000) on narrow Python builds 95 # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError 96 # is raised in case it is not. 97 object PyUnicode_FromOrdinal(int ordinal) 98 99 # Return a read-only pointer to the Unicode object's internal 100 # Py_UNICODE buffer, NULL if unicode is not a Unicode object. 101 Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL 102 103 # Return the length of the Unicode object. 104 Py_ssize_t PyUnicode_GetSize(object o) except -1 105 106 # Coerce an encoded object obj to an Unicode object and return a 107 # reference with incremented refcount. 108 # String and other char buffer compatible objects are decoded 109 # according to the given encoding and using the error handling 110 # defined by errors. Both can be NULL to have the interface use 111 # the default values (see the next section for details). 112 # All other objects, including Unicode objects, cause a TypeError 113 # to be set. 114 object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors) 115 116 # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict") 117 # which is used throughout the interpreter whenever coercion to 118 # Unicode is needed. 119 object PyUnicode_FromObject(object obj) 120 121 # If the platform supports wchar_t and provides a header file 122 # wchar.h, Python can interface directly to this type using the 123 # following functions. Support is optimized if Python's own 124 # Py_UNICODE type is identical to the system's wchar_t. 125 126 #ctypedef int wchar_t 127 128 # Create a Unicode object from the wchar_t buffer w of the given 129 # size. Return NULL on failure. 130 #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size) 131 132 #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size) 133 134# Codecs 135 136 # Create a Unicode object by decoding size bytes of the encoded 137 # string s. encoding and errors have the same meaning as the 138 # parameters of the same name in the unicode() builtin 139 # function. The codec to be used is looked up using the Python 140 # codec registry. Return NULL if an exception was raised by the 141 # codec. 142 object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors) 143 144 # Encode the Py_UNICODE buffer of the given size and return a 145 # Python string object. encoding and errors have the same meaning 146 # as the parameters of the same name in the Unicode encode() 147 # method. The codec to be used is looked up using the Python codec 148 # registry. Return NULL if an exception was raised by the codec. 149 object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size, 150 char *encoding, char *errors) 151 152 # Encode a Unicode object and return the result as Python string 153 # object. encoding and errors have the same meaning as the 154 # parameters of the same name in the Unicode encode() method. The 155 # codec to be used is looked up using the Python codec 156 # registry. Return NULL if an exception was raised by the codec. 157 object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors) 158 159# These are the UTF-8 codec APIs: 160 161 # Create a Unicode object by decoding size bytes of the UTF-8 162 # encoded string s. Return NULL if an exception was raised by the 163 # codec. 164 object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors) 165 166 # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If 167 # consumed is not NULL, trailing incomplete UTF-8 byte sequences 168 # will not be treated as an error. Those bytes will not be decoded 169 # and the number of bytes that have been decoded will be stored in 170 # consumed. New in version 2.4. 171 object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed) 172 173 # Encode the Py_UNICODE buffer of the given size using UTF-8 and 174 # return a Python string object. Return NULL if an exception was 175 # raised by the codec. 176 object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors) 177 178 # Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec. 179 object PyUnicode_AsUTF8String(object unicode) 180 181# These are the UTF-16 codec APIs: 182 183 # Decode length bytes from a UTF-16 encoded buffer string and 184 # return the corresponding Unicode object. errors (if non-NULL) 185 # defines the error handling. It defaults to ``strict''. 186 # 187 # If byteorder is non-NULL, the decoder starts decoding using the 188 # given byte order: 189 # 190 # *byteorder == -1: little endian 191 # *byteorder == 0: native order 192 # *byteorder == 1: big endian 193 # 194 # and then switches if the first two bytes of the input data are a 195 # byte order mark (BOM) and the specified byte order is native 196 # order. This BOM is not copied into the resulting Unicode 197 # string. After completion, *byteorder is set to the current byte 198 # order at the. 199 # 200 # If byteorder is NULL, the codec starts in native order mode. 201 object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder) 202 203 # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If 204 # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not 205 # treat trailing incomplete UTF-16 byte sequences (such as an odd 206 # number of bytes or a split surrogate pair) as an error. Those 207 # bytes will not be decoded and the number of bytes that have been 208 # decoded will be stored in consumed. New in version 2.4. 209 object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed) 210 211 # Return a Python string object holding the UTF-16 encoded value 212 # of the Unicode data in s. If byteorder is not 0, output is 213 # written according to the following byte order: 214 # 215 # byteorder == -1: little endian 216 # byteorder == 0: native byte order (writes a BOM mark) 217 # byteorder == 1: big endian 218 # 219 # If byteorder is 0, the output string will always start with the 220 # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark 221 # is prepended. 222 # 223 # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get 224 # represented as a surrogate pair. If it is not defined, each 225 # Py_UNICODE values is interpreted as an UCS-2 character. 226 object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder) 227 228 # Return a Python string using the UTF-16 encoding in native byte 229 # order. The string always starts with a BOM mark. Error handling 230 # is ``strict''. Return NULL if an exception was raised by the 231 # codec. 232 object PyUnicode_AsUTF16String(object unicode) 233 234# These are the ``Unicode Escape'' codec APIs: 235 236 # Create a Unicode object by decoding size bytes of the 237 # Unicode-Escape encoded string s. Return NULL if an exception was 238 # raised by the codec. 239 object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors) 240 241 # Encode the Py_UNICODE buffer of the given size using 242 # Unicode-Escape and return a Python string object. Return NULL if 243 # an exception was raised by the codec. 244 object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size) 245 246 # Encode a Unicode objects using Unicode-Escape and return the 247 # result as Python string object. Error handling is 248 # ``strict''. Return NULL if an exception was raised by the codec. 249 object PyUnicode_AsUnicodeEscapeString(object unicode) 250 251# These are the ``Raw Unicode Escape'' codec APIs: 252 253 # Create a Unicode object by decoding size bytes of the 254 # Raw-Unicode-Escape encoded string s. Return NULL if an exception 255 # was raised by the codec. 256 object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors) 257 258 # Encode the Py_UNICODE buffer of the given size using 259 # Raw-Unicode-Escape and return a Python string object. Return 260 # NULL if an exception was raised by the codec. 261 object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors) 262 263 # Encode a Unicode objects using Raw-Unicode-Escape and return the 264 # result as Python string object. Error handling is 265 # ``strict''. Return NULL if an exception was raised by the codec. 266 object PyUnicode_AsRawUnicodeEscapeString(object unicode) 267 268# These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding. 269 270 # Create a Unicode object by decoding size bytes of the Latin-1 271 # encoded string s. Return NULL if an exception was raised by the 272 # codec. 273 object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors) 274 275 # Encode the Py_UNICODE buffer of the given size using Latin-1 and 276 # return a Python string object. Return NULL if an exception was 277 # raised by the codec. 278 object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors) 279 280 # Encode a Unicode objects using Latin-1 and return the result as 281 # Python string object. Error handling is ``strict''. Return NULL 282 # if an exception was raised by the codec. 283 object PyUnicode_AsLatin1String(object unicode) 284 285# These are the ASCII codec APIs. Only 7-bit ASCII data is 286# accepted. All other codes generate errors. 287 288 # Create a Unicode object by decoding size bytes of the ASCII 289 # encoded string s. Return NULL if an exception was raised by the 290 # codec. 291 object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors) 292 293 # Encode the Py_UNICODE buffer of the given size using ASCII and 294 # return a Python string object. Return NULL if an exception was 295 # raised by the codec. 296 object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors) 297 298 # Encode a Unicode objects using ASCII and return the result as 299 # Python string object. Error handling is ``strict''. Return NULL 300 # if an exception was raised by the codec. 301 object PyUnicode_AsASCIIString(object o) 302 303# These are the mapping codec APIs: 304# 305# This codec is special in that it can be used to implement many 306# different codecs (and this is in fact what was done to obtain most 307# of the standard codecs included in the encodings package). The codec 308# uses mapping to encode and decode characters. 309# 310# Decoding mappings must map single string characters to single 311# Unicode characters, integers (which are then interpreted as Unicode 312# ordinals) or None (meaning "undefined mapping" and causing an 313# error). 314# 315# Encoding mappings must map single Unicode characters to single 316# string characters, integers (which are then interpreted as Latin-1 317# ordinals) or None (meaning "undefined mapping" and causing an 318# error). 319# 320# The mapping objects provided must only support the __getitem__ 321# mapping interface. 322# 323# If a character lookup fails with a LookupError, the character is 324# copied as-is meaning that its ordinal value will be interpreted as 325# Unicode or Latin-1 ordinal resp. Because of this, mappings only need 326# to contain those mappings which map characters to different code 327# points. 328 329 # Create a Unicode object by decoding size bytes of the encoded 330 # string s using the given mapping object. Return NULL if an 331 # exception was raised by the codec. If mapping is NULL latin-1 332 # decoding will be done. Else it can be a dictionary mapping byte 333 # or a unicode string, which is treated as a lookup table. Byte 334 # values greater that the length of the string and U+FFFE 335 # "characters" are treated as "undefined mapping". Changed in 336 # version 2.4: Allowed unicode string as mapping argument. 337 object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors) 338 339 # Encode the Py_UNICODE buffer of the given size using the given 340 # mapping object and return a Python string object. Return NULL if 341 # an exception was raised by the codec. 342 object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors) 343 344 # Encode a Unicode objects using the given mapping object and 345 # return the result as Python string object. Error handling is 346 # ``strict''. Return NULL if an exception was raised by the codec. 347 object PyUnicode_AsCharmapString(object o, object mapping) 348 349# The following codec API is special in that maps Unicode to Unicode. 350 351 # Translate a Py_UNICODE buffer of the given length by applying a 352 # character mapping table to it and return the resulting Unicode 353 # object. Return NULL when an exception was raised by the codec. 354 # 355 # The mapping table must map Unicode ordinal integers to Unicode 356 # ordinal integers or None (causing deletion of the character). 357 # 358 # Mapping tables need only provide the __getitem__() interface; 359 # dictionaries and sequences work well. Unmapped character 360 # ordinals (ones which cause a LookupError) are left untouched and 361 # are copied as-is. 362 object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size, 363 object table, char *errors) 364 365# These are the MBCS codec APIs. They are currently only available on 366# Windows and use the Win32 MBCS converters to implement the 367# conversions. Note that MBCS (or DBCS) is a class of encodings, not 368# just one. The target encoding is defined by the user settings on the 369# machine running the codec. 370 371 # Create a Unicode object by decoding size bytes of the MBCS 372 # encoded string s. Return NULL if an exception was raised by the 373 # codec. 374 object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors) 375 376 # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If 377 # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not 378 # decode trailing lead byte and the number of bytes that have been 379 # decoded will be stored in consumed. New in version 2.5. 380 # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0) 381 object PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed) 382 383 # Encode the Py_UNICODE buffer of the given size using MBCS and 384 # return a Python string object. Return NULL if an exception was 385 # raised by the codec. 386 object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors) 387 388 # Encode a Unicode objects using MBCS and return the result as 389 # Python string object. Error handling is ``strict''. Return NULL 390 # if an exception was raised by the codec. 391 object PyUnicode_AsMBCSString(object o) 392