1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 427f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 1999-2010, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: unistr_cnv.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:2 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2004aug19 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Character conversion functions moved here from unistr.cpp 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/putil.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 2985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "ucnv_imp.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "putilimp.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ustr_cnv.h" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ustr_imp.h" 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//======================================== 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constructors 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//======================================== 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#if !U_CHARSET_IS_UTF8 4185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 4285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUnicodeString::UnicodeString(const char *codepageData) 4385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : fShortLength(0), 4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fFlags(kShortString) 4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho{ 4685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(codepageData != 0) { 4785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0); 4885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 5085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 5185bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUnicodeString::UnicodeString(const char *codepageData, 5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t dataLength) 5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : fShortLength(0), 5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fFlags(kShortString) 5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho{ 5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(codepageData != 0) { 5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho doCodepageCreate(codepageData, dataLength, 0); 5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 6185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho// else see unistr.cpp 6285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 6385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::UnicodeString(const char *codepageData, 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *codepage) 6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : fShortLength(0), 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFlags(kShortString) 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(codepageData != 0) { 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage); 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::UnicodeString(const char *codepageData, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t dataLength, 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *codepage) 7785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : fShortLength(0), 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFlags(kShortString) 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(codepageData != 0) { 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCodepageCreate(codepageData, dataLength, codepage); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::UnicodeString(const char *src, int32_t srcLength, 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv, 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &errorCode) 8885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho : fShortLength(0), 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fFlags(kShortString) 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // check arguments 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(src==NULL) { 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // treat as an empty string, do nothing more 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(srcLength<-1) { 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ILLEGAL_ARGUMENT_ERROR; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // get input length 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(srcLength==-1) { 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru srcLength=(int32_t)uprv_strlen(src); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(srcLength>0) { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv!=0) { 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // use the provided converter 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_resetToUnicode(cnv); 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCodepageCreate(src, srcLength, cnv, errorCode); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // use the default converter 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=u_getDefaultConverter(&errorCode); 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCodepageCreate(src, srcLength, cnv, errorCode); 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_releaseDefaultConverter(cnv); 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//======================================== 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Codeset conversion 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//======================================== 12585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 12685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#if !U_CHARSET_IS_UTF8 12785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoint32_t 12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoUnicodeString::extract(int32_t start, 13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t length, 13185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho char *target, 13285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t dstSize) const { 13385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return extract(start, length, target, dstSize, 0); 13485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 13585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 13685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho// else see unistr.cpp 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::extract(int32_t start, 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length, 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *target, 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t dstSize, 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *codepage) const 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if the arguments are illegal, then do nothing 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // pin the indices to legal values 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pinIndices(start, length); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // We need to cast dstSize to int32_t for all subsequent code. 15585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // I don't know why the API was defined with uint32_t but we are stuck with it. 15685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize 15785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // as a limit in some functions, it may wrap around and yield a pointer 15885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // that compares less-than target. 15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t capacity; 16085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(dstSize < 0x7fffffff) { 16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // Assume that the capacity is real and a limit pointer won't wrap around. 16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho capacity = (int32_t)dstSize; 16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 16427f654740f2a26ad62a5c155af9199af9e69b889claireho // Pin the capacity so that a limit pointer does not wrap around. 16527f654740f2a26ad62a5c155af9199af9e69b889claireho char *targetLimit = (char *)U_MAX_PTR(target); 16627f654740f2a26ad62a5c155af9199af9e69b889claireho // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff 16727f654740f2a26ad62a5c155af9199af9e69b889claireho // greater than target and does not wrap around the top of the address space. 16827f654740f2a26ad62a5c155af9199af9e69b889claireho capacity = (int32_t)(targetLimit - target); 16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // create the converter 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *converter; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // just write the NUL if the string length is 0 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length == 0) { 17785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return u_terminateChars(target, capacity, 0, &status); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if the codepage is the default, use our cache 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if it is an empty string, then use the "invariant character" conversion 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (codepage == 0) { 18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *defaultName = ucnv_getDefaultName(); 18485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_FAST_IS_UTF8(defaultName)) { 18585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return toUTF8(start, length, target, capacity); 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter = u_getDefaultConverter(&status); 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (*codepage == 0) { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // use the "invariant characters" conversion 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t destLength; 19185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(length <= capacity) { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLength = length; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho destLength = capacity; 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_UCharsToChars(getArrayStart() + start, target, destLength); 19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return u_terminateChars(target, capacity, length, &status); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter = ucnv_open(codepage, &status); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho length = doExtract(start, length, target, capacity, converter, status); 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // close the converter 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (codepage == 0) { 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_releaseDefaultConverter(converter); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(converter); 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return length; 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::extract(char *dest, int32_t destCapacity, 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv, 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &errorCode) const 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ILLEGAL_ARGUMENT_ERROR; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // nothing to do? 22985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(isEmpty()) { 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return u_terminateChars(dest, destCapacity, 0, &errorCode); 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // get the converter 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isDefaultConverter; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv==0) { 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDefaultConverter=TRUE; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=u_getDefaultConverter(&errorCode); 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDefaultConverter=FALSE; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_resetFromUnicode(cnv); 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // convert 24785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode); 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // release the converter 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(isDefaultConverter) { 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_releaseDefaultConverter(cnv); 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return len; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::doExtract(int32_t start, int32_t length, 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *dest, int32_t destCapacity, 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv, 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &errorCode) const 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(destCapacity!=0) { 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *dest=0; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const UChar *src=getArrayStart()+start, *srcLimit=src+length; 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *originalDest=dest; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *destLimit; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(destCapacity==0) { 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit=dest=0; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(destCapacity==-1) { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used. 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit=(char*)U_MAX_PTR(dest); 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // for NUL-termination, translate into highest int32_t 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destCapacity=0x7fffffff; 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit=dest+destCapacity; 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // perform the conversion 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)(dest-originalDest); 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if an overflow occurs, then get the preflighting length 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[1024]; 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit=buffer+sizeof(buffer); 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dest=buffer; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length+=(int32_t)(dest-buffer); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(errorCode==U_BUFFER_OVERFLOW_ERROR); 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return u_terminateChars(originalDest, destCapacity, length, &errorCode); 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::doCodepageCreate(const char *codepageData, 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t dataLength, 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *codepage) 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if there's nothing to convert, do nothing 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(dataLength == -1) { 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataLength = (int32_t)uprv_strlen(codepageData); 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // create the converter 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if the codepage is the default, use our cache 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if it is an empty string, then use the "invariant character" conversion 32385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverter *converter; 32485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (codepage == 0) { 32585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *defaultName = ucnv_getDefaultName(); 32685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_FAST_IS_UTF8(defaultName)) { 32785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setToUTF8(StringPiece(codepageData, dataLength)); 32885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 32985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 33085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho converter = u_getDefaultConverter(&status); 33185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(*codepage == 0) { 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // use the "invariant characters" conversion 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_charsToUChars(codepageData, getArrayStart(), dataLength); 33585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setLength(dataLength); 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 34085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 34185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho converter = ucnv_open(codepage, &status); 34285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 34385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 34485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // if we failed, set the appropriate flags and return 34585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(status)) { 34685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setToBogus(); 34785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // perform the conversion 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCodepageCreate(codepageData, dataLength, converter, status); 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(status)) { 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // close the converter 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(codepage == 0) { 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_releaseDefaultConverter(converter); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(converter); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString::doCodepageCreate(const char *codepageData, 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t dataLength, 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *converter, 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status) 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(status)) { 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // set up the conversion parameters 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = codepageData; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceEnd = mySource + dataLength; 37785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar *array, *myTarget; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // estimate the size needed: 38085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t arraySize; 38185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(dataLength <= US_STACKBUF_SIZE) { 38285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // try to use the stack buffer 38385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho arraySize = US_STACKBUF_SIZE; 38485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 38585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // 1.25 UChar's per source byte should cover most cases 38685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho arraySize = dataLength + (dataLength >> 2); 38785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // we do not care about the current contents 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool doCopyArray = FALSE; 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // perform the conversion 39885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho array = getArrayStart(); 39985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myTarget = array + length(); 40085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_toUnicode(converter, &myTarget, array + getCapacity(), 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &mySource, mySourceEnd, 0, TRUE, &status); 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // update the conversion parameters 40485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setLength((int32_t)(myTarget - array)); 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // allocate more space and copy data, if needed 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(status == U_BUFFER_OVERFLOW_ERROR) { 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // reset the error code 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_ZERO_ERROR; 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // keep the previous conversion results 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru doCopyArray = TRUE; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // estimate the new size needed, larger than before 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // try 2 UChar's per remaining source byte 41685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource)); 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 426