1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 1999-2004, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   uconv_cnv.c:
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Implements all the low level conversion functions
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   T_UnicodeConverter_{to,from}Unicode_$ConversionType
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Change history:
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   06/29/2000  helena      Major rewrite of the callback APIs.
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv_err.h"
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uset.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_cnv.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_bld.h"
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_getCompleteUnicodeSet(const UConverter *cnv,
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   const USetAdder *sa,
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   UConverterUnicodeSet which,
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   UErrorCode *pErrorCode) {
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sa->addRange(sa->set, 0, 0x10ffff);
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               const USetAdder *sa,
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               UConverterUnicodeSet which,
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               UErrorCode *pErrorCode) {
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sa->addRange(sa->set, 0, 0xd7ff);
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sa->addRange(sa->set, 0xe000, 0x10ffff);
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_fromUWriteBytes(UConverter *cnv,
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                     const char *bytes, int32_t length,
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                     char **target, const char *targetLimit,
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                     int32_t **offsets,
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                     int32_t sourceIndex,
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                     UErrorCode *pErrorCode) {
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *t=*target;
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t *o;
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write bytes */
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(offsets==NULL || (o=*offsets)==NULL) {
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(length>0 && t<targetLimit) {
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=*bytes++;
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --length;
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* output with offsets */
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(length>0 && t<targetLimit) {
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=*bytes++;
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *o++=sourceIndex;
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --length;
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *offsets=o;
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *target=t;
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write overflow */
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length>0) {
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(cnv!=NULL) {
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t=(char *)cnv->charErrorBuffer;
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            cnv->charErrorBufferLength=(int8_t)length;
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            do {
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *t++=(uint8_t)*bytes++;
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } while(--length>0);
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_toUWriteUChars(UConverter *cnv,
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    const UChar *uchars, int32_t length,
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UChar **target, const UChar *targetLimit,
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t **offsets,
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t sourceIndex,
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode *pErrorCode) {
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *t=*target;
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t *o;
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write UChars */
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(offsets==NULL || (o=*offsets)==NULL) {
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(length>0 && t<targetLimit) {
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=*uchars++;
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --length;
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* output with offsets */
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(length>0 && t<targetLimit) {
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=*uchars++;
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *o++=sourceIndex;
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --length;
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *offsets=o;
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *target=t;
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write overflow */
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length>0) {
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(cnv!=NULL) {
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t=cnv->UCharErrorBuffer;
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            cnv->UCharErrorBufferLength=(int8_t)length;
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            do {
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *t++=*uchars++;
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } while(--length>0);
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_toUWriteCodePoint(UConverter *cnv,
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UChar32 c,
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UChar **target, const UChar *targetLimit,
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       int32_t **offsets,
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       int32_t sourceIndex,
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UErrorCode *pErrorCode) {
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *t;
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t *o;
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t=*target;
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(t<targetLimit) {
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(c<=0xffff) {
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=(UChar)c;
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c=U_SENTINEL;
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else /* c is a supplementary code point */ {
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *t++=U16_LEAD(c);
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c=U16_TRAIL(c);
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(t<targetLimit) {
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *t++=(UChar)c;
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=U_SENTINEL;
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write offsets */
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(offsets!=NULL && (o=*offsets)!=NULL) {
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *o++=sourceIndex;
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if((*target+1)<t) {
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *o++=sourceIndex;
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *offsets=o;
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *target=t;
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write overflow from c */
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(c>=0) {
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(cnv!=NULL) {
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int8_t i=0;
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c);
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            cnv->UCharErrorBufferLength=i;
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
175