1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 1999-2010, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  unistr_cnv.cpp
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:2
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2004aug19
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Markus W. Scherer
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Character conversion functions moved here from unistr.cpp
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/putil.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h"
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h"
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_imp.h"
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "putilimp.h"
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ustr_cnv.h"
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ustr_imp.h"
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//========================================
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructors
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//========================================
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !U_CHARSET_IS_UTF8
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::UnicodeString(const char *codepageData)
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fShortLength(0),
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFlags(kShortString)
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepageData != 0) {
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::UnicodeString(const char *codepageData,
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             int32_t dataLength)
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fShortLength(0),
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFlags(kShortString)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepageData != 0) {
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        doCodepageCreate(codepageData, dataLength, 0);
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// else see unistr.cpp
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::UnicodeString(const char *codepageData,
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             const char *codepage)
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fShortLength(0),
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFlags(kShortString)
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepageData != 0) {
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::UnicodeString(const char *codepageData,
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             int32_t dataLength,
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             const char *codepage)
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fShortLength(0),
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFlags(kShortString)
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepageData != 0) {
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        doCodepageCreate(codepageData, dataLength, codepage);
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::UnicodeString(const char *src, int32_t srcLength,
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             UConverter *cnv,
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                             UErrorCode &errorCode)
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  : fShortLength(0),
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fFlags(kShortString)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(errorCode)) {
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // check arguments
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(src==NULL) {
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // treat as an empty string, do nothing more
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(srcLength<-1) {
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // get input length
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(srcLength==-1) {
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                srcLength=(int32_t)uprv_strlen(src);
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(srcLength>0) {
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(cnv!=0) {
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // use the provided converter
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    ucnv_resetToUnicode(cnv);
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    doCodepageCreate(src, srcLength, cnv, errorCode);
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // use the default converter
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    cnv=u_getDefaultConverter(&errorCode);
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    doCodepageCreate(src, srcLength, cnv, errorCode);
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    u_releaseDefaultConverter(cnv);
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_FAILURE(errorCode)) {
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setToBogus();
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//========================================
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Codeset conversion
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//========================================
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !U_CHARSET_IS_UTF8
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::extract(int32_t start,
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       int32_t length,
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       char *target,
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       uint32_t dstSize) const {
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return extract(start, length, target, dstSize, 0);
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// else see unistr.cpp
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::extract(int32_t start,
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       int32_t length,
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       char *target,
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       uint32_t dstSize,
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       const char *codepage) const
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if the arguments are illegal, then do nothing
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // pin the indices to legal values
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pinIndices(start, length);
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // We need to cast dstSize to int32_t for all subsequent code.
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // I don't know why the API was defined with uint32_t but we are stuck with it.
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // as a limit in some functions, it may wrap around and yield a pointer
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // that compares less-than target.
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t capacity;
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(dstSize < 0x7fffffff) {
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Assume that the capacity is real and a limit pointer won't wrap around.
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        capacity = (int32_t)dstSize;
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Pin the capacity so that a limit pointer does not wrap around.
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        char *targetLimit = (char *)U_MAX_PTR(target);
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // greater than target and does not wrap around the top of the address space.
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        capacity = (int32_t)(targetLimit - target);
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // create the converter
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UConverter *converter;
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // just write the NUL if the string length is 0
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length == 0) {
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return u_terminateChars(target, capacity, 0, &status);
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if the codepage is the default, use our cache
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if it is an empty string, then use the "invariant character" conversion
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (codepage == 0) {
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const char *defaultName = ucnv_getDefaultName();
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(UCNV_FAST_IS_UTF8(defaultName)) {
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return toUTF8(start, length, target, capacity);
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        converter = u_getDefaultConverter(&status);
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if (*codepage == 0) {
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // use the "invariant characters" conversion
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t destLength;
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(length <= capacity) {
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            destLength = length;
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            destLength = capacity;
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_UCharsToChars(getArrayStart() + start, target, destLength);
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return u_terminateChars(target, capacity, length, &status);
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        converter = ucnv_open(codepage, &status);
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length = doExtract(start, length, target, capacity, converter, status);
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // close the converter
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (codepage == 0) {
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_releaseDefaultConverter(converter);
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_close(converter);
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return length;
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::extract(char *dest, int32_t destCapacity,
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UConverter *cnv,
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UErrorCode &errorCode) const
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // nothing to do?
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isEmpty()) {
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return u_terminateChars(dest, destCapacity, 0, &errorCode);
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // get the converter
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isDefaultConverter;
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(cnv==0) {
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isDefaultConverter=TRUE;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        cnv=u_getDefaultConverter(&errorCode);
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_FAILURE(errorCode)) {
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return 0;
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isDefaultConverter=FALSE;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_resetFromUnicode(cnv);
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // convert
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // release the converter
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isDefaultConverter) {
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_releaseDefaultConverter(cnv);
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return len;
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::doExtract(int32_t start, int32_t length,
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         char *dest, int32_t destCapacity,
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         UConverter *cnv,
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         UErrorCode &errorCode) const
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(destCapacity!=0) {
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest=0;
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *src=getArrayStart()+start, *srcLimit=src+length;
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *originalDest=dest;
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *destLimit;
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(destCapacity==0) {
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        destLimit=dest=0;
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(destCapacity==-1) {
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        destLimit=(char*)U_MAX_PTR(dest);
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // for NUL-termination, translate into highest int32_t
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        destCapacity=0x7fffffff;
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        destLimit=dest+destCapacity;
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // perform the conversion
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length=(int32_t)(dest-originalDest);
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if an overflow occurs, then get the preflighting length
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        char buffer[1024];
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        destLimit=buffer+sizeof(buffer);
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dest=buffer;
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errorCode=U_ZERO_ERROR;
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length+=(int32_t)(dest-buffer);
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return u_terminateChars(originalDest, destCapacity, length, &errorCode);
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::doCodepageCreate(const char *codepageData,
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t dataLength,
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                const char *codepage)
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if there's nothing to convert, do nothing
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(dataLength == -1) {
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataLength = (int32_t)uprv_strlen(codepageData);
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // create the converter
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if the codepage is the default, use our cache
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if it is an empty string, then use the "invariant character" conversion
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UConverter *converter;
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (codepage == 0) {
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const char *defaultName = ucnv_getDefaultName();
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(UCNV_FAST_IS_UTF8(defaultName)) {
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setToUTF8(StringPiece(codepageData, dataLength));
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        converter = u_getDefaultConverter(&status);
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(*codepage == 0) {
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // use the "invariant characters" conversion
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            u_charsToUChars(codepageData, getArrayStart(), dataLength);
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setLength(dataLength);
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setToBogus();
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        converter = ucnv_open(codepage, &status);
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we failed, set the appropriate flags and return
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(status)) {
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        setToBogus();
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // perform the conversion
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doCodepageCreate(codepageData, dataLength, converter, status);
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(status)) {
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        setToBogus();
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // close the converter
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(codepage == 0) {
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        u_releaseDefaultConverter(converter);
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_close(converter);
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString::doCodepageCreate(const char *codepageData,
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t dataLength,
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                UConverter *converter,
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                UErrorCode &status)
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(status)) {
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // set up the conversion parameters
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *mySource     = codepageData;
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *mySourceEnd  = mySource + dataLength;
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar *array, *myTarget;
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // estimate the size needed:
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t arraySize;
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(dataLength <= US_STACKBUF_SIZE) {
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // try to use the stack buffer
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        arraySize = US_STACKBUF_SIZE;
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // 1.25 UChar's per source byte should cover most cases
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        arraySize = dataLength + (dataLength >> 2);
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // we do not care about the current contents
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool doCopyArray = FALSE;
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(;;) {
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setToBogus();
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // perform the conversion
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        array = getArrayStart();
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        myTarget = array + length();
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_toUnicode(converter, &myTarget,  array + getCapacity(),
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            &mySource, mySourceEnd, 0, TRUE, &status);
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // update the conversion parameters
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        setLength((int32_t)(myTarget - array));
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // allocate more space and copy data, if needed
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(status == U_BUFFER_OVERFLOW_ERROR) {
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // reset the error code
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status = U_ZERO_ERROR;
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // keep the previous conversion results
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            doCopyArray = TRUE;
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // estimate the new size needed, larger than before
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // try 2 UChar's per remaining source byte
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
426