1/*
2**********************************************************************
3*   Copyright (C) 2000-2006, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6 *  ucnv_cb.c:
7 *  External APIs for the ICU's codeset conversion library
8 *  Helena Shih
9 *
10 * Modification History:
11 *
12 *   Date        Name        Description
13 *   7/28/2000   srl         Implementation
14 */
15
16/**
17 * @name Character Conversion C API
18 *
19 */
20
21#include "unicode/utypes.h"
22
23#if !UCONFIG_NO_CONVERSION
24
25#include "unicode/ucnv_cb.h"
26#include "ucnv_bld.h"
27#include "ucnv_cnv.h"
28#include "cmemory.h"
29
30/* need to update the offsets when the target moves. */
31/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
32if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
33the same call stack if the complexity arises. */
34U_CAPI void  U_EXPORT2
35ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
36                       const char* source,
37                       int32_t length,
38                       int32_t offsetIndex,
39                       UErrorCode * err)
40{
41    if(U_FAILURE(*err)) {
42        return;
43    }
44
45    ucnv_fromUWriteBytes(
46        args->converter,
47        source, length,
48        &args->target, args->targetLimit,
49        &args->offsets, offsetIndex,
50        err);
51}
52
53U_CAPI void  U_EXPORT2
54ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
55                             const UChar** source,
56                             const UChar*  sourceLimit,
57                             int32_t offsetIndex,
58                             UErrorCode * err)
59{
60    /*
61    This is a fun one.  Recursion can occur - we're basically going to
62    just retry shoving data through the same converter. Note, if you got
63    here through some kind of invalid sequence, you maybe should emit a
64    reset sequence of some kind and/or call ucnv_reset().  Since this
65    IS an actual conversion, take care that you've changed the callback
66    or the data, or you'll get an infinite loop.
67
68    Please set the err value to something reasonable before calling
69    into this.
70    */
71
72    char *oldTarget;
73
74    if(U_FAILURE(*err))
75    {
76        return;
77    }
78
79    oldTarget = args->target;
80
81    ucnv_fromUnicode(args->converter,
82        &args->target,
83        args->targetLimit,
84        source,
85        sourceLimit,
86        NULL, /* no offsets */
87        FALSE, /* no flush */
88        err);
89
90    if(args->offsets)
91    {
92        while (args->target != oldTarget)  /* if it moved at all.. */
93        {
94            *(args->offsets)++ = offsetIndex;
95            oldTarget++;
96        }
97    }
98
99    /*
100    Note, if you did something like used a Stop subcallback, things would get interesting.
101    In fact, here's where we want to return the partially consumed in-source!
102    */
103    if(*err == U_BUFFER_OVERFLOW_ERROR)
104    /* && (*source < sourceLimit && args->target >= args->targetLimit)
105    -- S. Hrcek */
106    {
107        /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
108        It's a fixed size. If we overflow it... Hmm */
109        char *newTarget;
110        const char *newTargetLimit;
111        UErrorCode err2 = U_ZERO_ERROR;
112
113        int8_t errBuffLen;
114
115        errBuffLen  = args->converter->charErrorBufferLength;
116
117        /* start the new target at the first free slot in the errbuff.. */
118        newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
119
120        newTargetLimit = (char *)(args->converter->charErrorBuffer +
121            sizeof(args->converter->charErrorBuffer));
122
123        if(newTarget >= newTargetLimit)
124        {
125            *err = U_INTERNAL_PROGRAM_ERROR;
126            return;
127        }
128
129        /* We're going to tell the converter that the errbuff len is empty.
130        This prevents the existing errbuff from being 'flushed' out onto
131        itself.  If the errbuff is needed by the converter this time,
132        we're hosed - we're out of space! */
133
134        args->converter->charErrorBufferLength = 0;
135
136        ucnv_fromUnicode(args->converter,
137                         &newTarget,
138                         newTargetLimit,
139                         source,
140                         sourceLimit,
141                         NULL,
142                         FALSE,
143                         &err2);
144
145        /* We can go ahead and overwrite the  length here. We know just how
146        to recalculate it. */
147
148        args->converter->charErrorBufferLength = (int8_t)(
149            newTarget - (char*)args->converter->charErrorBuffer);
150
151        if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
152        {
153            /* now we're REALLY in trouble.
154            Internal program error - callback shouldn't have written this much
155            data!
156            */
157            *err = U_INTERNAL_PROGRAM_ERROR;
158            return;
159        }
160        /*else {*/
161            /* sub errs could be invalid/truncated/illegal chars or w/e.
162            These might want to be passed on up.. But the problem is, we already
163            need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
164            other errs.. */
165
166            /*
167            if(U_FAILURE(err2))
168            ??
169            */
170        /*}*/
171    }
172}
173
174U_CAPI void  U_EXPORT2
175ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
176                           int32_t offsetIndex,
177                           UErrorCode * err)
178{
179    UConverter *converter;
180    int32_t length;
181
182    if(U_FAILURE(*err)) {
183        return;
184    }
185    converter = args->converter;
186    length = converter->subCharLen;
187
188    if(length == 0) {
189        return;
190    }
191
192    if(length < 0) {
193        /*
194         * Write/convert the substitution string. Its real length is -length.
195         * Unlike the escape callback, we need not change the converter's
196         * callback function because ucnv_setSubstString() verified that
197         * the string can be converted, so we will not get a conversion error
198         * and will not recurse.
199         * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
200         */
201        const UChar *source = (const UChar *)converter->subChars;
202        ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
203        return;
204    }
205
206    if(converter->sharedData->impl->writeSub!=NULL) {
207        converter->sharedData->impl->writeSub(args, offsetIndex, err);
208    }
209    else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
210        /*
211        TODO: Is this untestable because the MBCS converter has a writeSub function to call
212        and the other converters don't use subChar1?
213        */
214        ucnv_cbFromUWriteBytes(args,
215                               (const char *)&converter->subChar1, 1,
216                               offsetIndex, err);
217    }
218    else {
219        ucnv_cbFromUWriteBytes(args,
220                               (const char *)converter->subChars, length,
221                               offsetIndex, err);
222    }
223}
224
225U_CAPI void  U_EXPORT2
226ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
227                            const UChar* source,
228                            int32_t length,
229                            int32_t offsetIndex,
230                            UErrorCode * err)
231{
232    if(U_FAILURE(*err)) {
233        return;
234    }
235
236    ucnv_toUWriteUChars(
237        args->converter,
238        source, length,
239        &args->target, args->targetLimit,
240        &args->offsets, offsetIndex,
241        err);
242}
243
244U_CAPI void  U_EXPORT2
245ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
246                         int32_t offsetIndex,
247                       UErrorCode * err)
248{
249    static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
250
251    /* could optimize this case, just one uchar */
252    if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
253        ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
254    } else {
255        ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
256    }
257}
258
259#endif
260