1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ucnv_cnv.h: 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Definitions for converter implementations. 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History: 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 05/09/00 helena Added implementation to handle fallback mappings. 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/29/2000 helena Major rewrite of the callback APIs. 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef UCNV_CNV_H 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_CNV_H 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uset_imp.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* this is used in fromUnicode DBCS tables as an "unassigned" marker */ 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define missingCharMarker 0xFFFF 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * #define missingUCharMarker 0xfffe 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * commented out because there are actually two values used in toUnicode tables: 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U+fffe "unassigned" 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * U+ffff "illegal" 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** Forward declaration, see ucnv_bld.h */ 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UConverterSharedData; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct UConverterSharedData UConverterSharedData; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* function types for UConverterImpl ---------------------------------------- */ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* struct with arguments for UConverterLoad and ucnv_load() */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct { 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t size; /* sizeof(UConverterLoadArgs) */ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t nestedLoads; /* count nested ucnv_load() calls */ 5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool onlyTestIsLoadable; /* input: don't actually load */ 5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool reserved0; /* reserved - for good alignment of the pointers */ 5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int16_t reserved; /* reserved - for good alignment of the pointers */ 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t options; 5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *pkg, *name, *locale; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} UConverterLoadArgs; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define UCNV_LOAD_ARGS_INITIALIZER \ 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius { (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL } 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterLoad) (UConverterSharedData *sharedData, 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterLoadArgs *pArgs, 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *raw, UErrorCode *pErrorCode); 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterUnload) (UConverterSharedData *sharedData); 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 6785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterClose) (UConverter *cnv); 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum UConverterResetChoice { 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_RESET_BOTH, 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_RESET_TO_UNICODE, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_RESET_FROM_UNICODE 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} UConverterResetChoice; 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Converter implementation function(s) for ucnv_toUnicode(). 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the toUnicodeWithOffsets function pointer is NULL, 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then the toUnicode function will be used and the offsets will be set to -1. 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Must maintain state across buffers. Use toUBytes[toULength] for partial input 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sequences; it will be checked in ucnv.c at the end of the input stream 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to detect truncated input. 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND. 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The toUnicodeWithOffsets must write exactly as many offset values as target 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * units. Write offset values of -1 for when the source index corresponding to 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the output unit is not known (e.g., the character started in an earlier buffer). 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The pArgs->offsets pointer need not be moved forward. 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At function return, either one of the following conditions must be true: 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - another error code with toUBytes[toULength] set to the offending input 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - no error, and the source is consumed: source==sourceLimit 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The ucnv.c code will handle the end of the input (reset) 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (reset, and truncation detection) and callbacks. 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Same rules as for UConverterToUnicode. 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A lead surrogate is kept in fromUChar32 across buffers, and if an error 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * occurs, then the offending input code point must be put into fromUChar32 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as well. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *); 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Converter implementation function for ucnv_convertEx(), for direct conversion 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * between two charsets without pivoting through UTF-16. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In addition, 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - The toUnicode side must behave and keep state exactly like the 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UConverterToUnicode implementation for the same source charset. 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to pivoting. When this function is called, the conversion framework makes 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sure that this warning is not set on input. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - Continuing a partial match and flushing the toUnicode replay buffer 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by pivoting, using the toUnicode and fromUnicode functions. 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs *pToUArgs, 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Converter implementation function for ucnv_getNextUChar(). 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the function pointer is NULL, then the toUnicode function will be used. 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Will be called at a character boundary (toULength==0). 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * May return with 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (the return value will be ignored) 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!) 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with toUBytes[toULength] set to the offending input 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (the return value will be ignored) 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to indicate that the ucnv.c code shall call the toUnicode function instead 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - return a real code point result 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed. 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The ucnv.c code will handle the end of the input (reset) 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (except for truncation detection!) and callbacks. 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *); 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterGetStarters)(const UConverter* converter, 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool starters[256], 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* If this function pointer is null or if the function returns null 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the name field in static data struct should be returned by 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ucnv_getName() API function 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef const char * (*UConverterGetName) (const UConverter *cnv); 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Write the codepage substitution character. 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If this function is not set, then ucnv_cbFromUWriteSub() writes 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the substitution character from UConverter. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For stateful converters, it is typically necessary to handle this 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * specificially for the converter in order to properly maintain the state. 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For converter-specific safeClone processing 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * after the converter is done opening. 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If this function is set, then it is called just after a memcpy() of 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * converter data to the new, empty converter, and is expected to set up 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the initial state of the converter. It is not expected to increment the 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reference counts of the standard data types such as the shared data. 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *stackBuffer, 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *pBufferSize, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 184c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Filters for some ucnv_getUnicodeSet() implementation code. 185c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 186c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querutypedef enum UConverterSetFilter { 187c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_NONE, 188c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_DBCS_ONLY, 189c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_2022_CN, 190c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_SJIS, 191c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_GR94DBCS, 192c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_HZ, 193c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_SET_FILTER_COUNT 194c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} UConverterSetFilter; 195c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 196c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/** 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Fills the set of Unicode code points that can be converted by an ICU converter. 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The API function ucnv_getUnicodeSet() clears the USet before calling 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the converter's getUnicodeSet() implementation; the converter should only 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * add the appropriate code points to allow recursive use. 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For example, the ISO-2022-JP converter will call each subconverter's 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * getUnicodeSet() implementation to consecutively add code points to 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the same USet, which will result in a union of the sets of all subconverters. 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool CONVERSION_U_SUCCESS (UErrorCode err); 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UConverterImpl contains all the data and functions for a converter type. 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Its function pointers work much like a C++ vtable. 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Many converter types need to define only a subset of the functions; 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * when a function pointer is NULL, then a default action will be performed. 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * otherwise the converter may crash. 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Every converter type that has variable-length codepage sequences should 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * correct offset handling. 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * All other functions may or may not be implemented - it depends only on 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * whether the converter type needs them. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When open() fails, then close() will be called, if present. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UConverterImpl { 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterType type; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterLoad load; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnload unload; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterOpen open; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterClose close; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterReset reset; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicode toUnicode; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicode toUnicodeWithOffsets; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterFromUnicode fromUnicode; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterFromUnicode fromUnicodeWithOffsets; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterGetNextUChar getNextUChar; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterGetStarters getStarters; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterGetName getName; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterWriteSub writeSub; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSafeClone safeClone; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterGetUnicodeSet getUnicodeSet; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterConvert toUTF8; 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterConvert fromUTF8; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern const UConverterSharedData 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _MBCSData, _Latin1Data, 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Data, 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _HZData,_ISCIIData, _SCSUData, _ASCIIData, 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** Always use fallbacks from codepage to Unicode */ 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define TO_U_USE_FALLBACK(useFallback) TRUE 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000) 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c) 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Magic number for ucnv_getNextUChar(), returned by a 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * getNextUChar() implementation to indicate to use the converter's toUnicode() 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * instead of the native function. 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_getCompleteUnicodeSet(const UConverter *cnv, 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_fromUWriteBytes(UConverter *cnv, 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *bytes, int32_t length, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char **target, const char *targetLimit, 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_toUWriteUChars(UConverter *cnv, 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *uchars, int32_t length, 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_toUWriteCodePoint(UConverter *cnv, 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode); 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* UCNV_CNV */ 322