1/**
2*******************************************************************************
3* Copyright (C) 1996-2006, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*
7*
8*******************************************************************************
9*/
10/*
11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12 *  A JNI wrapper to ICU native converter Interface
13 * @author: Ram Viswanadha
14 */
15
16#define LOG_TAG "NativeConverter"
17
18#include "IcuUtilities.h"
19#include "JNIHelp.h"
20#include "JniConstants.h"
21#include "JniException.h"
22#include "ScopedLocalRef.h"
23#include "ScopedPrimitiveArray.h"
24#include "ScopedStringChars.h"
25#include "ScopedUtfChars.h"
26#include "cutils/log.h"
27#include "toStringArray.h"
28#include "unicode/ucnv.h"
29#include "unicode/ucnv_cb.h"
30#include "unicode/uniset.h"
31#include "unicode/ustring.h"
32#include "unicode/utypes.h"
33
34#include <memory>
35#include <vector>
36
37#include <stdlib.h>
38#include <string.h>
39
40#define NativeConverter_REPORT 0
41#define NativeConverter_IGNORE 1
42#define NativeConverter_REPLACE 2
43
44#define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
45
46struct DecoderCallbackContext {
47    UChar replacementChars[MAX_REPLACEMENT_LENGTH];
48    size_t replacementCharCount;
49    UConverterToUCallback onUnmappableInput;
50    UConverterToUCallback onMalformedInput;
51};
52
53struct EncoderCallbackContext {
54    char replacementBytes[MAX_REPLACEMENT_LENGTH];
55    size_t replacementByteCount;
56    UConverterFromUCallback onUnmappableInput;
57    UConverterFromUCallback onMalformedInput;
58};
59
60static UConverter* toUConverter(jlong address) {
61    return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
62}
63
64static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard,
65                                 std::vector<std::string>& result) {
66  UErrorCode status = U_ZERO_ERROR;
67  icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status));
68  if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) {
69    return false;
70  }
71
72  int32_t count = e.count(status);
73  if (maybeThrowIcuException(env, "StringEnumeration::count", status)) {
74    return false;
75  }
76
77  for (int32_t i = 0; i < count; ++i) {
78    const icu::UnicodeString* string = e.snext(status);
79    if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) {
80      return false;
81    }
82    std::string s;
83    string->toUTF8String(s);
84    if (s.find_first_of("+,") == std::string::npos) {
85      result.push_back(s);
86    }
87  }
88
89  return true;
90}
91
92static const char* getICUCanonicalName(const char* name) {
93  UErrorCode error = U_ZERO_ERROR;
94  const char* canonicalName = NULL;
95  if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
96    return canonicalName;
97  } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
98    return canonicalName;
99  } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
100    return canonicalName;
101  } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
102    // We have some aliases in the form x-blah .. match those first.
103    return canonicalName;
104  } else if (strstr(name, "x-") == name) {
105    // Check if the converter can be opened with the name given.
106    error = U_ZERO_ERROR;
107    icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error));
108    if (U_SUCCESS(error)) {
109      return name + 2;
110    }
111  }
112  return NULL;
113}
114
115// If a charset listed in the IANA Charset Registry is supported by an implementation
116// of the Java platform then its canonical name must be the name listed in the registry.
117// Many charsets are given more than one name in the registry, in which case the registry
118// identifies one of the names as MIME-preferred. If a charset has more than one registry
119// name then its canonical name must be the MIME-preferred name and the other names in
120// the registry must be valid aliases. If a supported charset is not listed in the IANA
121// registry then its canonical name must begin with one of the strings "X-" or "x-".
122static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
123  UErrorCode status = U_ZERO_ERROR;
124
125  // Check to see if this is a well-known MIME or IANA name.
126  const char* cName = NULL;
127  if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
128    return env->NewStringUTF(cName);
129  } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
130    return env->NewStringUTF(cName);
131  }
132
133  // Check to see if an alias already exists with "x-" prefix, if yes then
134  // make that the canonical name.
135  int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
136  for (int i = 0; i < aliasCount; ++i) {
137    const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
138    if (name != NULL && name[0] == 'x' && name[1] == '-') {
139      return env->NewStringUTF(name);
140    }
141  }
142
143  // As a last resort, prepend "x-" to any alias and make that the canonical name.
144  status = U_ZERO_ERROR;
145  const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
146  if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
147    name = ucnv_getAlias(icuCanonicalName, 1, &status);
148  }
149  // If there is no UTR22 canonical name then just return the original name.
150  if (name == NULL) {
151    name = icuCanonicalName;
152  }
153  std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]);
154  strcpy(&result[0], "x-");
155  strcat(&result[0], name);
156  return env->NewStringUTF(&result[0]);
157}
158
159static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
160    ScopedUtfChars converterNameChars(env, converterName);
161    if (converterNameChars.c_str() == NULL) {
162        return 0;
163    }
164    UErrorCode status = U_ZERO_ERROR;
165    UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
166    maybeThrowIcuException(env, "ucnv_open", status);
167    return reinterpret_cast<uintptr_t>(cnv);
168}
169
170static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
171    ucnv_close(toUConverter(address));
172}
173
174static bool shouldCodecThrow(jboolean flush, UErrorCode error) {
175    if (flush) {
176        return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND);
177    } else {
178        return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND);
179    }
180}
181
182static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
183        jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
184        jintArray data, jboolean flush) {
185
186    UConverter* cnv = toUConverter(address);
187    if (cnv == NULL) {
188        maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
189        return U_ILLEGAL_ARGUMENT_ERROR;
190    }
191    ScopedCharArrayRO uSource(env, source);
192    if (uSource.get() == NULL) {
193        maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
194        return U_ILLEGAL_ARGUMENT_ERROR;
195    }
196    ScopedByteArrayRW uTarget(env, target);
197    if (uTarget.get() == NULL) {
198        maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
199        return U_ILLEGAL_ARGUMENT_ERROR;
200    }
201    ScopedIntArrayRW myData(env, data);
202    if (myData.get() == NULL) {
203        maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
204        return U_ILLEGAL_ARGUMENT_ERROR;
205    }
206
207    // Do the conversion.
208    jint* sourceOffset = &myData[0];
209    jint* targetOffset = &myData[1];
210    const jchar* mySource = uSource.get() + *sourceOffset;
211    const UChar* mySourceLimit= uSource.get() + sourceEnd;
212    char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
213    const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
214    UErrorCode errorCode = U_ZERO_ERROR;
215    ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode);
216    *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
217    *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());
218
219    // If there was an error, count the problematic characters.
220    if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
221        errorCode == U_TRUNCATED_CHAR_FOUND) {
222        int8_t invalidUCharCount = 32;
223        UChar invalidUChars[32];
224        UErrorCode minorErrorCode = U_ZERO_ERROR;
225        ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
226        if (U_SUCCESS(minorErrorCode)) {
227            myData[2] = invalidUCharCount;
228        }
229    }
230
231    // Managed code handles some cases; throw all other errors.
232    if (shouldCodecThrow(flush, errorCode)) {
233        maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
234    }
235    return errorCode;
236}
237
238static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
239        jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
240        jintArray data, jboolean flush) {
241
242    UConverter* cnv = toUConverter(address);
243    if (cnv == NULL) {
244        maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
245        return U_ILLEGAL_ARGUMENT_ERROR;
246    }
247    ScopedByteArrayRO uSource(env, source);
248    if (uSource.get() == NULL) {
249        maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
250        return U_ILLEGAL_ARGUMENT_ERROR;
251    }
252    ScopedCharArrayRW uTarget(env, target);
253    if (uTarget.get() == NULL) {
254        maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
255        return U_ILLEGAL_ARGUMENT_ERROR;
256    }
257    ScopedIntArrayRW myData(env, data);
258    if (myData.get() == NULL) {
259        maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
260        return U_ILLEGAL_ARGUMENT_ERROR;
261    }
262
263    // Do the conversion.
264    jint* sourceOffset = &myData[0];
265    jint* targetOffset = &myData[1];
266    const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
267    const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
268    UChar* cTarget = uTarget.get() + *targetOffset;
269    const UChar* cTargetLimit = uTarget.get() + targetEnd;
270    UErrorCode errorCode = U_ZERO_ERROR;
271    ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
272    *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
273    *targetOffset = cTarget - uTarget.get() - *targetOffset;
274
275    // If there was an error, count the problematic bytes.
276    if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
277        errorCode == U_TRUNCATED_CHAR_FOUND) {
278        int8_t invalidByteCount = 32;
279        char invalidBytes[32] = {'\0'};
280        UErrorCode minorErrorCode = U_ZERO_ERROR;
281        ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
282        if (U_SUCCESS(minorErrorCode)) {
283            myData[2] = invalidByteCount;
284        }
285    }
286
287    // Managed code handles some cases; throw all other errors.
288    if (shouldCodecThrow(flush, errorCode)) {
289        maybeThrowIcuException(env, "ucnv_toUnicode", errorCode);
290    }
291    return errorCode;
292}
293
294static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
295    UConverter* cnv = toUConverter(address);
296    if (cnv) {
297        ucnv_resetToUnicode(cnv);
298    }
299}
300
301static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
302    UConverter* cnv = toUConverter(address);
303    if (cnv) {
304        ucnv_resetFromUnicode(cnv);
305    }
306}
307
308static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
309    UConverter* cnv = toUConverter(address);
310    return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
311}
312
313static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
314    UConverter* cnv = toUConverter(address);
315    return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
316}
317
318static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
319    UConverter* cnv = toUConverter(address);
320    return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
321}
322
323static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
324    int32_t num = ucnv_countAvailable();
325    jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL);
326    if (result == NULL) {
327        return NULL;
328    }
329    for (int i = 0; i < num; ++i) {
330        const char* name = ucnv_getAvailableName(i);
331        ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
332        if (javaCanonicalName.get() == NULL) {
333            return NULL;
334        }
335        env->SetObjectArrayElement(result, i, javaCanonicalName.get());
336        if (env->ExceptionCheck()) {
337            return NULL;
338        }
339    }
340    return result;
341}
342
343static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
344        const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
345        UErrorCode* status) {
346    if (!rawContext) {
347        return;
348    }
349    const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
350    switch(reason) {
351    case UCNV_UNASSIGNED:
352        ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
353        return;
354    case UCNV_ILLEGAL:
355    case UCNV_IRREGULAR:
356        ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
357        return;
358    case UCNV_CLOSE:
359        delete ctx;
360        return;
361    default:
362        *status = U_ILLEGAL_ARGUMENT_ERROR;
363        return;
364    }
365}
366
367static void encoderReplaceCallback(const void* rawContext,
368        UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
369        UConverterCallbackReason, UErrorCode * err) {
370    if (rawContext == NULL) {
371        return;
372    }
373    const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
374    *err = U_ZERO_ERROR;
375    ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
376}
377
378static UConverterFromUCallback getFromUCallback(int32_t mode) {
379    switch(mode) {
380    case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
381    case NativeConverter_REPLACE: return encoderReplaceCallback;
382    case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
383    }
384    abort();
385}
386
387static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
388        jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
389    UConverter* cnv = toUConverter(address);
390    if (cnv == NULL) {
391        maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
392        return;
393    }
394
395    UConverterFromUCallback oldCallback = NULL;
396    const void* oldCallbackContext = NULL;
397    ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
398
399    EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
400            reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
401    if (callbackContext == NULL) {
402        callbackContext = new EncoderCallbackContext;
403    }
404
405    callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
406    callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
407
408    ScopedByteArrayRO replacementBytes(env, javaReplacement);
409    if (replacementBytes.get() == NULL) {
410        maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
411        return;
412    }
413    memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
414    callbackContext->replacementByteCount = replacementBytes.size();
415
416    UErrorCode errorCode = U_ZERO_ERROR;
417    ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
418    maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
419}
420
421static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
422    // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
423    // never true for us.
424    *err = U_ZERO_ERROR;
425}
426
427static void decoderReplaceCallback(const void* rawContext,
428        UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
429        UErrorCode* err) {
430    if (!rawContext) {
431        return;
432    }
433    const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
434    *err = U_ZERO_ERROR;
435    ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
436}
437
438static UConverterToUCallback getToUCallback(int32_t mode) {
439    switch (mode) {
440    case NativeConverter_IGNORE: return decoderIgnoreCallback;
441    case NativeConverter_REPLACE: return decoderReplaceCallback;
442    case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
443    }
444    abort();
445}
446
447static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
448        const char* codeUnits, int32_t length,
449        UConverterCallbackReason reason, UErrorCode* status) {
450    if (!rawContext) {
451        return;
452    }
453    const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
454    switch(reason) {
455    case UCNV_UNASSIGNED:
456        ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
457        return;
458    case UCNV_ILLEGAL:
459    case UCNV_IRREGULAR:
460        ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
461        return;
462    case UCNV_CLOSE:
463        delete ctx;
464        return;
465    default:
466        *status = U_ILLEGAL_ARGUMENT_ERROR;
467        return;
468    }
469}
470
471static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
472        jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
473    UConverter* cnv = toUConverter(address);
474    if (cnv == NULL) {
475        maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
476        return;
477    }
478
479    UConverterToUCallback oldCallback;
480    const void* oldCallbackContext;
481    ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
482
483    DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
484            reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
485    if (callbackContext == NULL) {
486        callbackContext = new DecoderCallbackContext;
487    }
488
489    callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
490    callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
491
492    ScopedStringChars replacement(env, javaReplacement);
493    if (replacement.get() == NULL) {
494        maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
495        return;
496    }
497    u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size());
498    callbackContext->replacementCharCount = replacement.size();
499
500    UErrorCode errorCode = U_ZERO_ERROR;
501    ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
502    maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
503}
504
505static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
506    return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
507}
508
509static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
510    UConverter* cnv = toUConverter(address);
511    if (cnv == NULL) {
512        return NULL;
513    }
514    UErrorCode status = U_ZERO_ERROR;
515    char replacementBytes[MAX_REPLACEMENT_LENGTH];
516    int8_t len = sizeof(replacementBytes);
517    ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
518    if (!U_SUCCESS(status)) {
519        return env->NewByteArray(0);
520    }
521    jbyteArray result = env->NewByteArray(len);
522    if (result == NULL) {
523        return NULL;
524    }
525    env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
526    return result;
527}
528
529static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
530    ScopedUtfChars name1Chars(env, name1);
531    if (name1Chars.c_str() == NULL) {
532        return JNI_FALSE;
533    }
534    ScopedUtfChars name2Chars(env, name2);
535    if (name2Chars.c_str() == NULL) {
536        return JNI_FALSE;
537    }
538
539    UErrorCode errorCode = U_ZERO_ERROR;
540    icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode));
541    icu::UnicodeSet set1;
542    ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
543
544    icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode));
545    icu::UnicodeSet set2;
546    ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
547
548    return U_SUCCESS(errorCode) && set1.containsAll(set2);
549}
550
551static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
552    ScopedUtfChars charsetNameChars(env, charsetName);
553    if (charsetNameChars.c_str() == NULL) {
554        return NULL;
555    }
556
557    // Get ICU's canonical name for this charset.
558    const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
559    if (icuCanonicalName == NULL) {
560        return NULL;
561    }
562
563    // Get Java's canonical name for this charset.
564    jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
565    if (env->ExceptionCheck()) {
566        return NULL;
567    }
568
569    // Check that this charset is supported.
570    {
571        // ICU doesn't offer any "isSupported", so we just open and immediately close.
572        UErrorCode error = U_ZERO_ERROR;
573        icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error));
574        if (!U_SUCCESS(error)) {
575            return NULL;
576        }
577    }
578
579    // Get the aliases for this charset.
580    std::vector<std::string> aliases;
581    if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) {
582        return NULL;
583    }
584    if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) {
585        return NULL;
586    }
587    if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) {
588        return NULL;
589    }
590    if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) {
591        return NULL;
592    }
593    jobjectArray javaAliases = toStringArray(env, aliases);
594    if (env->ExceptionCheck()) {
595        return NULL;
596    }
597
598    // Construct the CharsetICU object.
599    static jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>",
600            "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
601    if (env->ExceptionCheck()) {
602        return NULL;
603    }
604
605    jstring icuCanonicalNameStr = env->NewStringUTF(icuCanonicalName);
606    if (env->ExceptionCheck()) {
607        return NULL;
608    }
609
610    return env->NewObject(JniConstants::charsetICUClass, charsetConstructor,
611            javaCanonicalName, icuCanonicalNameStr, javaAliases);
612}
613
614static void FreeNativeConverter(void *converter) {
615    ucnv_close(reinterpret_cast<UConverter*>(converter));
616}
617
618static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) {
619    return reinterpret_cast<jlong>(&FreeNativeConverter);
620}
621
622
623static jlong NativeConverter_getNativeSize(JNIEnv*, jclass, jstring) {
624    // TODO: Improve estimate.
625    return 200;
626}
627
628static JNINativeMethod gMethods[] = {
629    NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
630    NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
631    NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
632    NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
633    NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
634    NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
635    NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
636    NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
637    NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
638    NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
639    NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
640    NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
641    NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
642    NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
643    NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"),
644    NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"),
645    NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"),
646    NATIVE_METHOD(NativeConverter, getNativeSize, "()J")
647};
648void register_libcore_icu_NativeConverter(JNIEnv* env) {
649    jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
650}
651