1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Copyright (C) 1999-2009 International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*  ucnv_bld.h:
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*  Contains internal data structure definitions
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created by Bertrand A. Damiba
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Change history:
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   06/29/2000  helena      Major rewrite of the callback APIs.
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef UCNV_BLD_H
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_BLD_H
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnvmbcs.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_ext.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "udataswp.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* size of the overflow buffers in UConverter, enough for escaping callbacks */
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_ERROR_BUFFER_LENGTH 32
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_MAX_SUBCHAR_LEN 4
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_MAX_CHAR_LEN 8
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* converter options bits */
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_OPTION_VERSION     0xf
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_OPTION_SWAP_LFNL   0x10
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 itself is compiled under C++, the linkage of the funcptrs will
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 work.
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruunion UConverterTable {
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverterMBCSTable mbcs;
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef union UConverterTable UConverterTable;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UConverterImpl;
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct UConverterImpl UConverterImpl;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** values for the unicodeMask */
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_HAS_SUPPLEMENTARY 1
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_HAS_SURROGATES    2
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct UConverterStaticData {   /* +offset: size */
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t structSize;                /* +0: 4 Size of this structure */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char name
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60  internal name of the converter- invariant chars */
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t codepage;               /* +64: 4 codepage # (now IBM-$codepage) */
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t platform;                /* +68: 1 platform of the converter (only IBM now) */
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t conversionType;          /* +69: 1 conversion type */
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t minBytesPerChar;         /* +70: 1 Minimum # bytes per char in this codepage */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t maxBytesPerChar;         /* +71: 1 Maximum # bytes output per UChar in this codepage */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4  [note:  4 and 8 byte boundary] */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t subCharLen;              /* +76: 1 */
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t hasToUnicodeFallback;   /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t hasFromUnicodeFallback; /* +78: 1 */
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t unicodeMask;            /* +79: 1  bit 0: has supplementary  bit 1: has single surrogates */
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t subChar1;               /* +80: 1  single-byte substitution character for IBM MBCS (0 if none) */
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t reserved[19];           /* +81: 19 to round out the structure */
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    /* total size: 100 */
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} UConverterStaticData;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Defines the UConverterSharedData struct,
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the immutable, shared part of UConverter.
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UConverterSharedData {
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t structSize;            /* Size of this structure */
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t referenceCounter;      /* used to count number of clients, 0xffffffff for static SharedData */
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const void *dataMemory;         /* from udata_openChoice() - for cleanup */
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void *table;                    /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool                sharedDataCached;   /* TRUE:  shared data is in cache, don't destroy on ucnv_close() if 0 ref.  FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  /*UBool               staticDataOwned;   TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached.   */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UConverterImpl *impl;     /* vtable-style struct of mostly function pointers */
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*initial values of some members of the mutable part of object */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t toUnicodeStatus;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Shared data structures currently come in two flavors:
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - readonly for built-in algorithmic converters
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - allocated for MBCS, with a pointer to an allocated UConverterTable
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   which always has a UConverterMBCSTable
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * To eliminate one allocation, I am making the UConverterMBCSTable
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a member of the shared data. It is the last member so that static
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * definitions of UConverterSharedData work as before.
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The table field above also remains to avoid updating all static
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * definitions, but is now unused.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * markus 2003-nov-07
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverterMBCSTable mbcs;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Defines a UConverter, the lightweight mutable part the user sees */
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UConverter {
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Error function pointer called when conversion issues
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * occur during a ucnv_fromUnicode call
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context,
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UConverterFromUnicodeArgs *args,
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     const UChar *codeUnits,
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     int32_t length,
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UChar32 codePoint,
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UConverterCallbackReason reason,
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UErrorCode *);
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Error function pointer called when conversion issues
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * occur during a ucnv_toUnicode call
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context,
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    UConverterToUnicodeArgs *args,
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    const char *codeUnits,
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    int32_t length,
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    UConverterCallbackReason reason,
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    UErrorCode *);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Pointer to additional data that depends on the converter type.
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Used by ISO 2022, SCSU, GB 18030 converters, possibly more.
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void *extraInfo;
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const void *fromUContext;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const void *toUContext;
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Pointer to charset bytes for substitution string if subCharLen>0,
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * or pointer to Unicode string (UChar *) if subCharLen<0.
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * subCharLen==0 is equivalent to using a skip callback.
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If the pointer is !=subUChars then it is allocated with
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The subUChars field is declared as UChar[] not uint8_t[] to
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * guarantee alignment for UChars.
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *subChars;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverterSharedData *sharedData;   /* Pointer to the shared immutable part of the converter object */
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool sharedDataIsCached;  /* TRUE:  shared data is in cache, don't destroy on ucnv_close() if 0 ref.  FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isCopyLocal;  /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool  useFallback;
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t toULength;                   /* number of bytes in toUBytes */
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t toUnicodeStatus;           /* Used to internalize stream status information */
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t mode;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t fromUnicodeStatus;
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * More fromUnicode() status. Serves 3 purposes:
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - keeps a lead surrogate between buffers (similar to toUBytes[])
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - keeps a lead surrogate at the end of the stream,
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   which the framework handles as truncated input
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - if the fromUnicode() implementation returns to the framework
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   for this code point
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 fromUChar32;
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * value for ucnv_getMaxCharSize()
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * usually simply copied from the static data, but ucnvmbcs.c modifies
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the value depending on the converter type and options
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t maxBytesPerUChar;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t subCharLen;                  /* length of the codepage specific character sequence */
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t invalidCharLength;
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t charErrorBufferLength;       /* number of valid bytes in charErrorBuffer */
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t invalidUCharLength;
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t UCharErrorBufferLength;      /* number of valid UChars in charErrorBuffer */
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t subChar1;                                   /* single-byte substitution character if different from subChar */
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool useSubChar1;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char invalidCharBuffer[UCNV_MAX_CHAR_LEN];          /* bytes from last error/callback situation */
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH];  /* codepage output from Error functions */
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar invalidUCharBuffer[U16_MAX_LENGTH];           /* UChars from last error/callback situation */
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH];   /* unicode output from Error functions */
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* fields for conversion extension */
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* store previous UChars/chars to continue partial matches */
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 preFromUFirstCP;                /* >=0: partial match */
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar preFromU[UCNV_EXT_MAX_UCHARS];
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char preToU[UCNV_EXT_MAX_BYTES];
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t preFromULength, preToULength;    /* negative: replay */
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t preToUFirstLength;               /* length of first character */
23085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
23185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* new fields for ICU 4.0 */
23285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END /* end of UConverter */
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CONVERTER_FILE_EXTENSION ".cnv"
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the number of all converter names.
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode The error code
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the number of all converter names
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC uint16_t
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_bld_countAvailableConverters(UErrorCode *pErrorCode);
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the (n)th converter name in mixed case, or NULL
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if there is none (typically, if the data cannot be loaded).
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0<=index<ucnv_io_countAvailableConverters().
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param n The number specifies which converter name to get
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pErrorCode The error code
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return the (n)th converter name in mixed case, or NULL if there is none.
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC const char *
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode);
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Load a non-algorithmic converter.
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverterSharedData *
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err);
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unload a non-algorithmic converter.
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * It must be sharedData->referenceCounter != ~0
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and this function must be called inside umtx_lock(&cnvCacheMutex).
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_unload(UConverterSharedData *sharedData);
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Swap ICU .cnv conversion tables. See udataswp.h.
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @internal
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_swap(const UDataSwapper *ds,
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          const void *inData, int32_t length, void *outData,
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          UErrorCode *pErrorCode);
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* _UCNV_BLD */
286