1/*
2**********************************************************************
3*   Copyright (C) 1999-2004, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6 *
7 *
8 *   ucnv_err.h:
9 */
10
11/**
12 * \file
13 * \brief C UConverter predefined error callbacks
14 *
15 *  <h2>Error Behaviour Functions</h2>
16 *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
17 *  These are provided as part of ICU and many are stable, but they
18 *  can also be considered only as an example of what can be done with
19 *  callbacks.  You may of course write your own.
20 *
21 *  If you want to write your own, you may also find the functions from
22 *  ucnv_cb.h useful when writing your own callbacks.
23 *
24 *  These functions, although public, should NEVER be called directly.
25 *  They should be used as parameters to the ucnv_setFromUCallback
26 *  and ucnv_setToUCallback functions, to set the behaviour of a converter
27 *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
28 *
29 *  usage example:  'STOP' doesn't need any context, but newContext
30 *    could be set to something other than 'NULL' if needed. The available
31 *    contexts in this header can modify the default behavior of the callback.
32 *
33 *  \code
34 *  UErrorCode err = U_ZERO_ERROR;
35 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
36 *  const void *oldContext;
37 *  UConverterFromUCallback oldAction;
38 *
39 *
40 *  if (U_SUCCESS(err))
41 *  {
42 *      ucnv_setFromUCallBack(myConverter,
43 *                       UCNV_FROM_U_CALLBACK_STOP,
44 *                       NULL,
45 *                       &oldAction,
46 *                       &oldContext,
47 *                       &status);
48 *  }
49 *  \endcode
50 *
51 *  The code above tells "myConverter" to stop when it encounters an
52 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
53 *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
54 *  and ucnv_setToUCallBack would need to be called in order to change
55 *  that behavior too.
56 *
57 *  Here is an example with a context:
58 *
59 *  \code
60 *  UErrorCode err = U_ZERO_ERROR;
61 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
62 *  const void *oldContext;
63 *  UConverterFromUCallback oldAction;
64 *
65 *
66 *  if (U_SUCCESS(err))
67 *  {
68 *      ucnv_setToUCallBack(myConverter,
69 *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
70 *                       UCNV_SUB_STOP_ON_ILLEGAL,
71 *                       &oldAction,
72 *                       &oldContext,
73 *                       &status);
74 *  }
75 *  \endcode
76 *
77 *  The code above tells "myConverter" to stop when it encounters an
78 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
79 *  Codepage -> Unicode. Any unmapped and legal characters will be
80 *  substituted to be the default substitution character.
81 */
82
83#ifndef UCNV_ERR_H
84#define UCNV_ERR_H
85
86#include "unicode/utypes.h"
87
88#if !UCONFIG_NO_CONVERSION
89
90/** Forward declaring the UConverter structure. @stable ICU 2.0 */
91struct UConverter;
92
93/** @stable ICU 2.0 */
94typedef struct UConverter UConverter;
95
96/**
97 * FROM_U, TO_U context options for sub callback
98 * @stable ICU 2.0
99 */
100#define UCNV_SUB_STOP_ON_ILLEGAL "i"
101
102/**
103 * FROM_U, TO_U context options for skip callback
104 * @stable ICU 2.0
105 */
106#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
107
108/**
109 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
110 * @stable ICU 2.0
111 */
112#define UCNV_ESCAPE_ICU       NULL
113/**
114 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
115 * @stable ICU 2.0
116 */
117#define UCNV_ESCAPE_JAVA      "J"
118/**
119 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
120 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
121 * @stable ICU 2.0
122 */
123#define UCNV_ESCAPE_C         "C"
124/**
125 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&amp;#DDDD;)
126 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&amp;#DDDD;)
127 * @stable ICU 2.0
128 */
129#define UCNV_ESCAPE_XML_DEC   "D"
130/**
131 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&amp;#xXXXX;)
132 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&amp;#xXXXX;)
133 * @stable ICU 2.0
134 */
135#define UCNV_ESCAPE_XML_HEX   "X"
136/**
137 * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
138 * @stable ICU 2.0
139 */
140#define UCNV_ESCAPE_UNICODE   "U"
141
142/**
143 * The process condition code to be used with the callbacks.
144 * Codes which are greater than UCNV_IRREGULAR should be
145 * passed on to any chained callbacks.
146 * @stable ICU 2.0
147 */
148typedef enum {
149    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
150                             The error code U_INVALID_CHAR_FOUND will be set. */
151    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example,
152                             \\x81\\x2E is illegal in SJIS because \\x2E
153                             is not a valid trail byte for the \\x81
154                             lead byte.
155                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
156                             in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
157                             are also illegal, not just irregular.
158                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
159    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in
160                             the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
161                             are irregular UTF-8 byte sequences for single surrogate
162                             code points.
163                             The error code U_INVALID_CHAR_FOUND will be set. */
164    UCNV_RESET = 3,       /**< The callback is called with this reason when a
165                             'reset' has occured. Callback should reset all
166                             state. */
167    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
168                             callback should release any allocated memory.*/
169    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
170                              converter. the pointer available as the
171                              'context' is an alias to the original converters'
172                              context pointer. If the context must be owned
173                              by the new converter, the callback must clone
174                              the data and call ucnv_setFromUCallback
175                              (or setToUCallback) with the correct pointer.
176                              @stable ICU 2.2
177                           */
178} UConverterCallbackReason;
179
180
181/**
182 * The structure for the fromUnicode callback function parameter.
183 * @stable ICU 2.0
184 */
185typedef struct {
186    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
187    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
188    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
189    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
190    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
191    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
192    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
193    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
194} UConverterFromUnicodeArgs;
195
196
197/**
198 * The structure for the toUnicode callback function parameter.
199 * @stable ICU 2.0
200 */
201typedef struct {
202    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
203    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
204    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
205    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
206    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
207    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
208    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
209    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
210} UConverterToUnicodeArgs;
211
212
213/**
214 * DO NOT CALL THIS FUNCTION DIRECTLY!
215 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
216 * returning the error code back to the caller immediately.
217 *
218 * @param context Pointer to the callback's private data
219 * @param fromUArgs Information about the conversion in progress
220 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
221 * @param length Size (in bytes) of the concerned codepage sequence
222 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
223 * @param reason Defines the reason the callback was invoked
224 * @param err This should always be set to a failure status prior to calling.
225 * @stable ICU 2.0
226 */
227U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
228                  const void *context,
229                  UConverterFromUnicodeArgs *fromUArgs,
230                  const UChar* codeUnits,
231                  int32_t length,
232                  UChar32 codePoint,
233                  UConverterCallbackReason reason,
234                  UErrorCode * err);
235
236
237
238/**
239 * DO NOT CALL THIS FUNCTION DIRECTLY!
240 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
241 * returning the error code back to the caller immediately.
242 *
243 * @param context Pointer to the callback's private data
244 * @param toUArgs Information about the conversion in progress
245 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
246 * @param length Size (in bytes) of the concerned codepage sequence
247 * @param reason Defines the reason the callback was invoked
248 * @param err This should always be set to a failure status prior to calling.
249 * @stable ICU 2.0
250 */
251U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
252                  const void *context,
253                  UConverterToUnicodeArgs *toUArgs,
254                  const char* codeUnits,
255                  int32_t length,
256                  UConverterCallbackReason reason,
257                  UErrorCode * err);
258
259/**
260 * DO NOT CALL THIS FUNCTION DIRECTLY!
261 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
262 * skips only UNASSINGED_SEQUENCE depending on the context parameter
263 * simply ignoring those characters.
264 *
265 * @param context  The function currently recognizes the callback options:
266 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
267 *                      returning the error code back to the caller immediately.
268 *                 NULL: Skips any ILLEGAL_SEQUENCE
269 * @param fromUArgs Information about the conversion in progress
270 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
271 * @param length Size (in bytes) of the concerned codepage sequence
272 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
273 * @param reason Defines the reason the callback was invoked
274 * @param err Return value will be set to success if the callback was handled,
275 *      otherwise this value will be set to a failure status.
276 * @stable ICU 2.0
277 */
278U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
279                  const void *context,
280                  UConverterFromUnicodeArgs *fromUArgs,
281                  const UChar* codeUnits,
282                  int32_t length,
283                  UChar32 codePoint,
284                  UConverterCallbackReason reason,
285                  UErrorCode * err);
286
287/**
288 * DO NOT CALL THIS FUNCTION DIRECTLY!
289 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
290 * UNASSIGNED_SEQUENCE depending on context parameter, with the
291 * current substitution string for the converter. This is the default
292 * callback.
293 *
294 * @param context The function currently recognizes the callback options:
295 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
296 *                      returning the error code back to the caller immediately.
297 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
298 * @param fromUArgs Information about the conversion in progress
299 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
300 * @param length Size (in bytes) of the concerned codepage sequence
301 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
302 * @param reason Defines the reason the callback was invoked
303 * @param err Return value will be set to success if the callback was handled,
304 *      otherwise this value will be set to a failure status.
305 * @see ucnv_setSubstChars
306 * @stable ICU 2.0
307 */
308U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
309                  const void *context,
310                  UConverterFromUnicodeArgs *fromUArgs,
311                  const UChar* codeUnits,
312                  int32_t length,
313                  UChar32 codePoint,
314                  UConverterCallbackReason reason,
315                  UErrorCode * err);
316
317/**
318 * DO NOT CALL THIS FUNCTION DIRECTLY!
319 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
320 * hexadecimal representation of the illegal codepoints
321 *
322 * @param context The function currently recognizes the callback options:
323 *        <ul>
324 *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
325 *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
326 *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
327 *          it will  substitute  the illegal sequence with the substitution characters.
328 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
329 *          %UD84D%UDC56</li>
330 *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
331 *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
332 *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
333 *          it will  substitute  the illegal sequence with the substitution characters.
334 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
335 *          \\uD84D\\uDC56</li>
336 *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
337 *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
338 *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
339 *          it will  substitute  the illegal sequence with the substitution characters.
340 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
341 *          \\U00023456</li>
342 *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal
343 *          representation in the format  &amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;").
344 *          In the Event the converter doesn't support the characters {&amp;,#}[0-9],
345 *          it will  substitute  the illegal sequence with the substitution characters.
346 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
347 *          &amp;#144470; and Zero padding is ignored.</li>
348 *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal
349 *          representation in the format  &#xXXXX, e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;").
350 *          In the Event the converter doesn't support the characters {&,#,x}[0-9],
351 *          it will  substitute  the illegal sequence with the substitution characters.
352 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
353 *          &amp;#x23456;</li>
354 *        </ul>
355 * @param fromUArgs Information about the conversion in progress
356 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
357 * @param length Size (in bytes) of the concerned codepage sequence
358 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
359 * @param reason Defines the reason the callback was invoked
360 * @param err Return value will be set to success if the callback was handled,
361 *      otherwise this value will be set to a failure status.
362 * @stable ICU 2.0
363 */
364U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
365                  const void *context,
366                  UConverterFromUnicodeArgs *fromUArgs,
367                  const UChar* codeUnits,
368                  int32_t length,
369                  UChar32 codePoint,
370                  UConverterCallbackReason reason,
371                  UErrorCode * err);
372
373
374/**
375 * DO NOT CALL THIS FUNCTION DIRECTLY!
376 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
377 * skips only UNASSINGED_SEQUENCE depending on the context parameter
378 * simply ignoring those characters.
379 *
380 * @param context  The function currently recognizes the callback options:
381 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
382 *                      returning the error code back to the caller immediately.
383 *                 NULL: Skips any ILLEGAL_SEQUENCE
384 * @param toUArgs Information about the conversion in progress
385 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
386 * @param length Size (in bytes) of the concerned codepage sequence
387 * @param reason Defines the reason the callback was invoked
388 * @param err Return value will be set to success if the callback was handled,
389 *      otherwise this value will be set to a failure status.
390 * @stable ICU 2.0
391 */
392U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
393                  const void *context,
394                  UConverterToUnicodeArgs *toUArgs,
395                  const char* codeUnits,
396                  int32_t length,
397                  UConverterCallbackReason reason,
398                  UErrorCode * err);
399
400/**
401 * DO NOT CALL THIS FUNCTION DIRECTLY!
402 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
403 * UNASSIGNED_SEQUENCE depending on context parameter,  with the
404 * Unicode substitution character, U+FFFD.
405 *
406 * @param context  The function currently recognizes the callback options:
407 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
408 *                      returning the error code back to the caller immediately.
409 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
410 * @param toUArgs Information about the conversion in progress
411 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
412 * @param length Size (in bytes) of the concerned codepage sequence
413 * @param reason Defines the reason the callback was invoked
414 * @param err Return value will be set to success if the callback was handled,
415 *      otherwise this value will be set to a failure status.
416 * @stable ICU 2.0
417 */
418U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
419                  const void *context,
420                  UConverterToUnicodeArgs *toUArgs,
421                  const char* codeUnits,
422                  int32_t length,
423                  UConverterCallbackReason reason,
424                  UErrorCode * err);
425
426/**
427 * DO NOT CALL THIS FUNCTION DIRECTLY!
428 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
429 * hexadecimal representation of the illegal bytes
430 *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
431 *
432 * @param context This function currently recognizes the callback options:
433 *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
434 *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
435 * @param toUArgs Information about the conversion in progress
436 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
437 * @param length Size (in bytes) of the concerned codepage sequence
438 * @param reason Defines the reason the callback was invoked
439 * @param err Return value will be set to success if the callback was handled,
440 *      otherwise this value will be set to a failure status.
441 * @stable ICU 2.0
442 */
443
444U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
445                  const void *context,
446                  UConverterToUnicodeArgs *toUArgs,
447                  const char* codeUnits,
448                  int32_t length,
449                  UConverterCallbackReason reason,
450                  UErrorCode * err);
451
452#endif
453
454#endif
455
456/*UCNV_ERR_H*/
457