1/*
2 *****************************************************************************
3 *
4 *   Copyright (C) 1998-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *****************************************************************************
8 *
9 *  ucnv_err.c
10 *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11 *
12 *
13*   Change history:
14*
15*   06/29/2000  helena      Major rewrite of the callback APIs.
16*/
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_CONVERSION
21
22#include "unicode/ucnv_err.h"
23#include "unicode/ucnv_cb.h"
24#include "ucnv_cnv.h"
25#include "cmemory.h"
26#include "unicode/ucnv.h"
27#include "ustrfmt.h"
28
29#define VALUE_STRING_LENGTH 32
30/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31#define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
32#define UNICODE_U_CODEPOINT             0x0055
33#define UNICODE_X_CODEPOINT             0x0058
34#define UNICODE_RS_CODEPOINT            0x005C
35#define UNICODE_U_LOW_CODEPOINT         0x0075
36#define UNICODE_X_LOW_CODEPOINT         0x0078
37#define UNICODE_AMP_CODEPOINT           0x0026
38#define UNICODE_HASH_CODEPOINT          0x0023
39#define UNICODE_SEMICOLON_CODEPOINT     0x003B
40#define UNICODE_PLUS_CODEPOINT          0x002B
41#define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
42#define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
43#define UNICODE_SPACE_CODEPOINT         0x0020
44#define UCNV_PRV_ESCAPE_ICU         0
45#define UCNV_PRV_ESCAPE_C           'C'
46#define UCNV_PRV_ESCAPE_XML_DEC     'D'
47#define UCNV_PRV_ESCAPE_XML_HEX     'X'
48#define UCNV_PRV_ESCAPE_JAVA        'J'
49#define UCNV_PRV_ESCAPE_UNICODE     'U'
50#define UCNV_PRV_ESCAPE_CSS2        'S'
51#define UCNV_PRV_STOP_ON_ILLEGAL    'i'
52
53/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
54U_CAPI void    U_EXPORT2
55UCNV_FROM_U_CALLBACK_STOP (
56                  const void *context,
57                  UConverterFromUnicodeArgs *fromUArgs,
58                  const UChar* codeUnits,
59                  int32_t length,
60                  UChar32 codePoint,
61                  UConverterCallbackReason reason,
62                  UErrorCode * err)
63{
64    /* the caller must have set the error code accordingly */
65    return;
66}
67
68
69/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
70U_CAPI void    U_EXPORT2
71UCNV_TO_U_CALLBACK_STOP (
72                   const void *context,
73                   UConverterToUnicodeArgs *toUArgs,
74                   const char* codePoints,
75                   int32_t length,
76                   UConverterCallbackReason reason,
77                   UErrorCode * err)
78{
79    /* the caller must have set the error code accordingly */
80    return;
81}
82
83U_CAPI void    U_EXPORT2
84UCNV_FROM_U_CALLBACK_SKIP (
85                  const void *context,
86                  UConverterFromUnicodeArgs *fromUArgs,
87                  const UChar* codeUnits,
88                  int32_t length,
89                  UChar32 codePoint,
90                  UConverterCallbackReason reason,
91                  UErrorCode * err)
92{
93    if (reason <= UCNV_IRREGULAR)
94    {
95        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
96        {
97            *err = U_ZERO_ERROR;
98        }
99        /* else the caller must have set the error code accordingly. */
100    }
101    /* else ignore the reset, close and clone calls. */
102}
103
104U_CAPI void    U_EXPORT2
105UCNV_FROM_U_CALLBACK_SUBSTITUTE (
106                  const void *context,
107                  UConverterFromUnicodeArgs *fromArgs,
108                  const UChar* codeUnits,
109                  int32_t length,
110                  UChar32 codePoint,
111                  UConverterCallbackReason reason,
112                  UErrorCode * err)
113{
114    if (reason <= UCNV_IRREGULAR)
115    {
116        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
117        {
118            *err = U_ZERO_ERROR;
119            ucnv_cbFromUWriteSub(fromArgs, 0, err);
120        }
121        /* else the caller must have set the error code accordingly. */
122    }
123    /* else ignore the reset, close and clone calls. */
124}
125
126/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
127 *uses a clean copy (resetted) of the converter, to convert that unicode
128 *escape sequence to the target codepage (if conversion failure happens then
129 *we revert to substituting with subchar)
130 */
131U_CAPI void    U_EXPORT2
132UCNV_FROM_U_CALLBACK_ESCAPE (
133                         const void *context,
134                         UConverterFromUnicodeArgs *fromArgs,
135                         const UChar *codeUnits,
136                         int32_t length,
137                         UChar32 codePoint,
138                         UConverterCallbackReason reason,
139                         UErrorCode * err)
140{
141
142  UChar valueString[VALUE_STRING_LENGTH];
143  int32_t valueStringLength = 0;
144  int32_t i = 0;
145
146  const UChar *myValueSource = NULL;
147  UErrorCode err2 = U_ZERO_ERROR;
148  UConverterFromUCallback original = NULL;
149  const void *originalContext;
150
151  UConverterFromUCallback ignoredCallback = NULL;
152  const void *ignoredContext;
153
154  if (reason > UCNV_IRREGULAR)
155  {
156      return;
157  }
158
159  ucnv_setFromUCallBack (fromArgs->converter,
160                     (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
161                     NULL,
162                     &original,
163                     &originalContext,
164                     &err2);
165
166  if (U_FAILURE (err2))
167  {
168    *err = err2;
169    return;
170  }
171  if(context==NULL)
172  {
173      while (i < length)
174      {
175        valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
176        valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
177        valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
178      }
179  }
180  else
181  {
182      switch(*((char*)context))
183      {
184      case UCNV_PRV_ESCAPE_JAVA:
185          while (i < length)
186          {
187              valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
188              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
189              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
190          }
191          break;
192
193      case UCNV_PRV_ESCAPE_C:
194          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
195
196          if(length==2){
197              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
198              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
199
200          }
201          else{
202              valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
203              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
204          }
205          break;
206
207      case UCNV_PRV_ESCAPE_XML_DEC:
208
209          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
210          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
211          if(length==2){
212              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
213          }
214          else{
215              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
216          }
217          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
218          break;
219
220      case UCNV_PRV_ESCAPE_XML_HEX:
221
222          valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
223          valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
224          valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
225          if(length==2){
226              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
227          }
228          else{
229              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
230          }
231          valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
232          break;
233
234      case UCNV_PRV_ESCAPE_UNICODE:
235          valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
236          valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
237          valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
238          if (length == 2) {
239              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
240          } else {
241              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
242          }
243          valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
244          break;
245
246      case UCNV_PRV_ESCAPE_CSS2:
247          valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
248          valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
249          /* Always add space character, becase the next character might be whitespace,
250             which would erroneously be considered the termination of the escape sequence. */
251          valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
252          break;
253
254      default:
255          while (i < length)
256          {
257              valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
258              valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
259              valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
260          }
261      }
262  }
263  myValueSource = valueString;
264
265  /* reset the error */
266  *err = U_ZERO_ERROR;
267
268  ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
269
270  ucnv_setFromUCallBack (fromArgs->converter,
271                         original,
272                         originalContext,
273                         &ignoredCallback,
274                         &ignoredContext,
275                         &err2);
276  if (U_FAILURE (err2))
277  {
278      *err = err2;
279      return;
280  }
281
282  return;
283}
284
285
286
287U_CAPI void  U_EXPORT2
288UCNV_TO_U_CALLBACK_SKIP (
289                 const void *context,
290                 UConverterToUnicodeArgs *toArgs,
291                 const char* codeUnits,
292                 int32_t length,
293                 UConverterCallbackReason reason,
294                 UErrorCode * err)
295{
296    if (reason <= UCNV_IRREGULAR)
297    {
298        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
299        {
300            *err = U_ZERO_ERROR;
301        }
302        /* else the caller must have set the error code accordingly. */
303    }
304    /* else ignore the reset, close and clone calls. */
305}
306
307U_CAPI void    U_EXPORT2
308UCNV_TO_U_CALLBACK_SUBSTITUTE (
309                 const void *context,
310                 UConverterToUnicodeArgs *toArgs,
311                 const char* codeUnits,
312                 int32_t length,
313                 UConverterCallbackReason reason,
314                 UErrorCode * err)
315{
316    if (reason <= UCNV_IRREGULAR)
317    {
318        if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
319        {
320            *err = U_ZERO_ERROR;
321            ucnv_cbToUWriteSub(toArgs,0,err);
322        }
323        /* else the caller must have set the error code accordingly. */
324    }
325    /* else ignore the reset, close and clone calls. */
326}
327
328/*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
329 *and uses that as the substitution sequence
330 */
331U_CAPI void   U_EXPORT2
332UCNV_TO_U_CALLBACK_ESCAPE (
333                 const void *context,
334                 UConverterToUnicodeArgs *toArgs,
335                 const char* codeUnits,
336                 int32_t length,
337                 UConverterCallbackReason reason,
338                 UErrorCode * err)
339{
340    UChar uniValueString[VALUE_STRING_LENGTH];
341    int32_t valueStringLength = 0;
342    int32_t i = 0;
343
344    if (reason > UCNV_IRREGULAR)
345    {
346        return;
347    }
348
349    if(context==NULL)
350    {
351        while (i < length)
352        {
353            uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
354            uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
355            valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
356        }
357    }
358    else
359    {
360        switch(*((char*)context))
361        {
362        case UCNV_PRV_ESCAPE_XML_DEC:
363            while (i < length)
364            {
365                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
366                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
367                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
368                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
369            }
370            break;
371
372        case UCNV_PRV_ESCAPE_XML_HEX:
373            while (i < length)
374            {
375                uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
376                uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
377                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
378                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
379                uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
380            }
381            break;
382        case UCNV_PRV_ESCAPE_C:
383            while (i < length)
384            {
385                uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
386                uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
387                valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
388            }
389            break;
390        default:
391            while (i < length)
392            {
393                uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
394                uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
395                uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
396                valueStringLength += 2;
397            }
398        }
399    }
400    /* reset the error */
401    *err = U_ZERO_ERROR;
402
403    ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
404}
405
406#endif
407