1/** 2******************************************************************************* 3* Copyright (C) 1996-2006, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6* 7* 8******************************************************************************* 9*/ 10/* 11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved 12 * A JNI wrapper to ICU native converter Interface 13 * @author: Ram Viswanadha 14 */ 15 16#define LOG_TAG "NativeConverter" 17 18#include "IcuUtilities.h" 19#include "JNIHelp.h" 20#include "JniConstants.h" 21#include "JniException.h" 22#include "ScopedLocalRef.h" 23#include "ScopedPrimitiveArray.h" 24#include "ScopedStringChars.h" 25#include "ScopedUtfChars.h" 26#include "UniquePtr.h" 27#include "cutils/log.h" 28#include "toStringArray.h" 29#include "unicode/ucnv.h" 30#include "unicode/ucnv_cb.h" 31#include "unicode/uniset.h" 32#include "unicode/ustring.h" 33#include "unicode/utypes.h" 34 35#include <vector> 36 37#include <stdlib.h> 38#include <string.h> 39 40#define NativeConverter_REPORT 0 41#define NativeConverter_IGNORE 1 42#define NativeConverter_REPLACE 2 43 44#define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH 45 46struct DecoderCallbackContext { 47 UChar replacementChars[MAX_REPLACEMENT_LENGTH]; 48 size_t replacementCharCount; 49 UConverterToUCallback onUnmappableInput; 50 UConverterToUCallback onMalformedInput; 51}; 52 53struct EncoderCallbackContext { 54 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 55 size_t replacementByteCount; 56 UConverterFromUCallback onUnmappableInput; 57 UConverterFromUCallback onMalformedInput; 58}; 59 60struct UConverterDeleter { 61 void operator()(UConverter* p) const { 62 ucnv_close(p); 63 } 64}; 65typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter; 66 67static UConverter* toUConverter(jlong address) { 68 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address)); 69} 70 71static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) { 72 ScopedUtfChars converterNameChars(env, converterName); 73 if (converterNameChars.c_str() == NULL) { 74 return 0; 75 } 76 UErrorCode status = U_ZERO_ERROR; 77 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status); 78 maybeThrowIcuException(env, "ucnv_open", status); 79 return reinterpret_cast<uintptr_t>(cnv); 80} 81 82static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) { 83 ucnv_close(toUConverter(address)); 84} 85 86static bool shouldCodecThrow(jboolean flush, UErrorCode error) { 87 if (flush) { 88 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND); 89 } else { 90 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND); 91 } 92} 93 94static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address, 95 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, 96 jintArray data, jboolean flush) { 97 98 UConverter* cnv = toUConverter(address); 99 if (cnv == NULL) { 100 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 101 return U_ILLEGAL_ARGUMENT_ERROR; 102 } 103 ScopedCharArrayRO uSource(env, source); 104 if (uSource.get() == NULL) { 105 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 106 return U_ILLEGAL_ARGUMENT_ERROR; 107 } 108 ScopedByteArrayRW uTarget(env, target); 109 if (uTarget.get() == NULL) { 110 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 111 return U_ILLEGAL_ARGUMENT_ERROR; 112 } 113 ScopedIntArrayRW myData(env, data); 114 if (myData.get() == NULL) { 115 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 116 return U_ILLEGAL_ARGUMENT_ERROR; 117 } 118 119 // Do the conversion. 120 jint* sourceOffset = &myData[0]; 121 jint* targetOffset = &myData[1]; 122 const jchar* mySource = uSource.get() + *sourceOffset; 123 const UChar* mySourceLimit= uSource.get() + sourceEnd; 124 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 125 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd); 126 UErrorCode errorCode = U_ZERO_ERROR; 127 ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode); 128 *sourceOffset = (mySource - uSource.get()) - *sourceOffset; 129 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset; 130 131 // If there was an error, count the problematic characters. 132 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 133 int8_t invalidUCharCount = 32; 134 UChar invalidUChars[32]; 135 UErrorCode minorErrorCode = U_ZERO_ERROR; 136 ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode); 137 if (U_SUCCESS(minorErrorCode)) { 138 myData[2] = invalidUCharCount; 139 } 140 } 141 142 // Managed code handles some cases; throw all other errors. 143 if (shouldCodecThrow(flush, errorCode)) { 144 maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode); 145 } 146 return errorCode; 147} 148 149static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address, 150 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, 151 jintArray data, jboolean flush) { 152 153 UConverter* cnv = toUConverter(address); 154 if (cnv == NULL) { 155 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 156 return U_ILLEGAL_ARGUMENT_ERROR; 157 } 158 ScopedByteArrayRO uSource(env, source); 159 if (uSource.get() == NULL) { 160 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 161 return U_ILLEGAL_ARGUMENT_ERROR; 162 } 163 ScopedCharArrayRW uTarget(env, target); 164 if (uTarget.get() == NULL) { 165 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 166 return U_ILLEGAL_ARGUMENT_ERROR; 167 } 168 ScopedIntArrayRW myData(env, data); 169 if (myData.get() == NULL) { 170 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 171 return U_ILLEGAL_ARGUMENT_ERROR; 172 } 173 174 // Do the conversion. 175 jint* sourceOffset = &myData[0]; 176 jint* targetOffset = &myData[1]; 177 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset); 178 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); 179 UChar* cTarget = uTarget.get() + *targetOffset; 180 const UChar* cTargetLimit = uTarget.get() + targetEnd; 181 UErrorCode errorCode = U_ZERO_ERROR; 182 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode); 183 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset; 184 *targetOffset = cTarget - uTarget.get() - *targetOffset; 185 186 // If there was an error, count the problematic bytes. 187 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 188 int8_t invalidByteCount = 32; 189 char invalidBytes[32] = {'\0'}; 190 UErrorCode minorErrorCode = U_ZERO_ERROR; 191 ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode); 192 if (U_SUCCESS(minorErrorCode)) { 193 myData[2] = invalidByteCount; 194 } 195 } 196 197 // Managed code handles some cases; throw all other errors. 198 if (shouldCodecThrow(flush, errorCode)) { 199 maybeThrowIcuException(env, "ucnv_toUnicode", errorCode); 200 } 201 return errorCode; 202} 203 204static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) { 205 UConverter* cnv = toUConverter(address); 206 if (cnv) { 207 ucnv_resetToUnicode(cnv); 208 } 209} 210 211static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) { 212 UConverter* cnv = toUConverter(address); 213 if (cnv) { 214 ucnv_resetFromUnicode(cnv); 215 } 216} 217 218static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) { 219 UConverter* cnv = toUConverter(address); 220 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; 221} 222 223static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) { 224 UConverter* cnv = toUConverter(address); 225 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1; 226} 227 228static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) { 229 UConverter* cnv = toUConverter(address); 230 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; 231} 232 233static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) { 234 UErrorCode errorCode = U_ZERO_ERROR; 235 UConverter* cnv = toUConverter(address); 236 if (cnv == NULL) { 237 return JNI_FALSE; 238 } 239 240 UChar srcBuffer[3]; 241 const UChar* src = &srcBuffer[0]; 242 const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2]; 243 244 char dstBuffer[5]; 245 char* dst = &dstBuffer[0]; 246 const char* dstLimit = &dstBuffer[4]; 247 248 int i = 0; 249 UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit); 250 251 ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode); 252 return U_SUCCESS(errorCode); 253} 254 255/* 256 * If a charset listed in the IANA Charset Registry is supported by an implementation 257 * of the Java platform then its canonical name must be the name listed in the registry. 258 * Many charsets are given more than one name in the registry, in which case the registry 259 * identifies one of the names as MIME-preferred. If a charset has more than one registry 260 * name then its canonical name must be the MIME-preferred name and the other names in 261 * the registry must be valid aliases. If a supported charset is not listed in the IANA 262 * registry then its canonical name must begin with one of the strings "X-" or "x-". 263 */ 264static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) { 265 UErrorCode status = U_ZERO_ERROR; 266 267 // Check to see if this is a well-known MIME or IANA name. 268 const char* cName = NULL; 269 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { 270 return env->NewStringUTF(cName); 271 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { 272 return env->NewStringUTF(cName); 273 } 274 275 // Check to see if an alias already exists with "x-" prefix, if yes then 276 // make that the canonical name. 277 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); 278 for (int i = 0; i < aliasCount; ++i) { 279 const char* name = ucnv_getAlias(icuCanonicalName, i, &status); 280 if (name != NULL && name[0] == 'x' && name[1] == '-') { 281 return env->NewStringUTF(name); 282 } 283 } 284 285 // As a last resort, prepend "x-" to any alias and make that the canonical name. 286 status = U_ZERO_ERROR; 287 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); 288 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { 289 name = ucnv_getAlias(icuCanonicalName, 1, &status); 290 } 291 // If there is no UTR22 canonical name then just return the original name. 292 if (name == NULL) { 293 name = icuCanonicalName; 294 } 295 UniquePtr<char[]> result(new char[2 + strlen(name) + 1]); 296 strcpy(&result[0], "x-"); 297 strcat(&result[0], name); 298 return env->NewStringUTF(&result[0]); 299} 300 301static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) { 302 int32_t num = ucnv_countAvailable(); 303 jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL); 304 if (result == NULL) { 305 return NULL; 306 } 307 for (int i = 0; i < num; ++i) { 308 const char* name = ucnv_getAvailableName(i); 309 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name)); 310 if (javaCanonicalName.get() == NULL) { 311 return NULL; 312 } 313 env->SetObjectArrayElement(result, i, javaCanonicalName.get()); 314 if (env->ExceptionCheck()) { 315 return NULL; 316 } 317 } 318 return result; 319} 320 321static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) { 322 // Get an upper bound on the number of aliases... 323 const char* myEncName = icuCanonicalName; 324 UErrorCode error = U_ZERO_ERROR; 325 size_t aliasCount = ucnv_countAliases(myEncName, &error); 326 if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') { 327 myEncName = myEncName + 2; 328 aliasCount = ucnv_countAliases(myEncName, &error); 329 } 330 if (!U_SUCCESS(error)) { 331 return NULL; 332 } 333 334 // Collect the aliases we want... 335 std::vector<std::string> aliases; 336 for (size_t i = 0; i < aliasCount; ++i) { 337 const char* name = ucnv_getAlias(myEncName, i, &error); 338 if (!U_SUCCESS(error)) { 339 return NULL; 340 } 341 // TODO: why do we ignore these ones? 342 if (strchr(name, '+') == 0 && strchr(name, ',') == 0) { 343 aliases.push_back(name); 344 } 345 } 346 return toStringArray(env, aliases); 347} 348 349static const char* getICUCanonicalName(const char* name) { 350 UErrorCode error = U_ZERO_ERROR; 351 const char* canonicalName = NULL; 352 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) { 353 return canonicalName; 354 } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) { 355 return canonicalName; 356 } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) { 357 return canonicalName; 358 } else if((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) { 359 /* we have some aliases in the form x-blah .. match those first */ 360 return canonicalName; 361 } else if (strstr(name, "x-") == name) { 362 /* check if the converter can be opened with the name given */ 363 error = U_ZERO_ERROR; 364 UniqueUConverter cnv(ucnv_open(name + 2, &error)); 365 if (cnv.get() != NULL) { 366 return name + 2; 367 } 368 } 369 return NULL; 370} 371 372static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args, 373 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, 374 UErrorCode* status) { 375 if (!rawContext) { 376 return; 377 } 378 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 379 switch(reason) { 380 case UCNV_UNASSIGNED: 381 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status); 382 return; 383 case UCNV_ILLEGAL: 384 case UCNV_IRREGULAR: 385 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status); 386 return; 387 case UCNV_CLOSE: 388 delete ctx; 389 return; 390 default: 391 *status = U_ILLEGAL_ARGUMENT_ERROR; 392 return; 393 } 394} 395 396static void encoderReplaceCallback(const void* rawContext, 397 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32, 398 UConverterCallbackReason, UErrorCode * err) { 399 if (rawContext == NULL) { 400 return; 401 } 402 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 403 *err = U_ZERO_ERROR; 404 ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err); 405} 406 407static UConverterFromUCallback getFromUCallback(int32_t mode) { 408 switch(mode) { 409 case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP; 410 case NativeConverter_REPLACE: return encoderReplaceCallback; 411 case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP; 412 } 413 abort(); 414} 415 416static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, 417 jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) { 418 UConverter* cnv = toUConverter(address); 419 if (cnv == NULL) { 420 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 421 return; 422 } 423 424 UConverterFromUCallback oldCallback = NULL; 425 const void* oldCallbackContext = NULL; 426 ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext)); 427 428 EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>( 429 reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext)); 430 if (callbackContext == NULL) { 431 callbackContext = new EncoderCallbackContext; 432 } 433 434 callbackContext->onMalformedInput = getFromUCallback(onMalformedInput); 435 callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput); 436 437 ScopedByteArrayRO replacementBytes(env, javaReplacement); 438 if (replacementBytes.get() == NULL) { 439 maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR); 440 return; 441 } 442 memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size()); 443 callbackContext->replacementByteCount = replacementBytes.size(); 444 445 UErrorCode errorCode = U_ZERO_ERROR; 446 ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 447 maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode); 448} 449 450static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { 451 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is 452 // never true for us. 453 *err = U_ZERO_ERROR; 454} 455 456static void decoderReplaceCallback(const void* rawContext, 457 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, 458 UErrorCode* err) { 459 if (!rawContext) { 460 return; 461 } 462 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 463 *err = U_ZERO_ERROR; 464 ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err); 465} 466 467static UConverterToUCallback getToUCallback(int32_t mode) { 468 switch (mode) { 469 case NativeConverter_IGNORE: return decoderIgnoreCallback; 470 case NativeConverter_REPLACE: return decoderReplaceCallback; 471 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; 472 } 473 abort(); 474} 475 476static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args, 477 const char* codeUnits, int32_t length, 478 UConverterCallbackReason reason, UErrorCode* status) { 479 if (!rawContext) { 480 return; 481 } 482 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 483 switch(reason) { 484 case UCNV_UNASSIGNED: 485 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status); 486 return; 487 case UCNV_ILLEGAL: 488 case UCNV_IRREGULAR: 489 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status); 490 return; 491 case UCNV_CLOSE: 492 delete ctx; 493 return; 494 default: 495 *status = U_ILLEGAL_ARGUMENT_ERROR; 496 return; 497 } 498} 499 500static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address, 501 jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) { 502 UConverter* cnv = toUConverter(address); 503 if (cnv == NULL) { 504 maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR); 505 return; 506 } 507 508 UConverterToUCallback oldCallback; 509 const void* oldCallbackContext; 510 ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext); 511 512 DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>( 513 reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext)); 514 if (callbackContext == NULL) { 515 callbackContext = new DecoderCallbackContext; 516 } 517 518 callbackContext->onMalformedInput = getToUCallback(onMalformedInput); 519 callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput); 520 521 ScopedStringChars replacement(env, javaReplacement); 522 if (replacement.get() == NULL) { 523 maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR); 524 return; 525 } 526 u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size()); 527 callbackContext->replacementCharCount = replacement.size(); 528 529 UErrorCode errorCode = U_ZERO_ERROR; 530 ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 531 maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode); 532} 533 534static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) { 535 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle)); 536} 537 538static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) { 539 UConverter* cnv = toUConverter(address); 540 if (cnv == NULL) { 541 return NULL; 542 } 543 UErrorCode status = U_ZERO_ERROR; 544 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 545 int8_t len = sizeof(replacementBytes); 546 ucnv_getSubstChars(cnv, replacementBytes, &len, &status); 547 if (!U_SUCCESS(status)) { 548 return env->NewByteArray(0); 549 } 550 jbyteArray result = env->NewByteArray(len); 551 if (result == NULL) { 552 return NULL; 553 } 554 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes)); 555 return result; 556} 557 558static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { 559 ScopedUtfChars name1Chars(env, name1); 560 if (name1Chars.c_str() == NULL) { 561 return JNI_FALSE; 562 } 563 ScopedUtfChars name2Chars(env, name2); 564 if (name2Chars.c_str() == NULL) { 565 return JNI_FALSE; 566 } 567 568 UErrorCode errorCode = U_ZERO_ERROR; 569 UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode)); 570 UnicodeSet set1; 571 ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 572 573 UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode)); 574 UnicodeSet set2; 575 ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 576 577 return U_SUCCESS(errorCode) && set1.containsAll(set2); 578} 579 580static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) { 581 ScopedUtfChars charsetNameChars(env, charsetName); 582 if (charsetNameChars.c_str() == NULL) { 583 return NULL; 584 } 585 // Get ICU's canonical name for this charset. 586 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str()); 587 if (icuCanonicalName == NULL) { 588 return NULL; 589 } 590 // Get Java's canonical name for this charset. 591 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName); 592 if (env->ExceptionCheck()) { 593 return NULL; 594 } 595 596 // Check that this charset is supported. 597 // ICU doesn't offer any "isSupported", so we just open and immediately close. 598 // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need. 599 UErrorCode dummy = U_ZERO_ERROR; 600 UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy)); 601 if (cnv.get() == NULL) { 602 return NULL; 603 } 604 cnv.reset(); 605 606 // Get the aliases for this charset. 607 jobjectArray aliases = getAliases(env, icuCanonicalName); 608 if (env->ExceptionCheck()) { 609 return NULL; 610 } 611 612 // Construct the CharsetICU object. 613 static jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>", 614 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"); 615 if (env->ExceptionCheck()) { 616 return NULL; 617 } 618 return env->NewObject(JniConstants::charsetICUClass, charsetConstructor, 619 javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases); 620} 621 622static JNINativeMethod gMethods[] = { 623 NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"), 624 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"), 625 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"), 626 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"), 627 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"), 628 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"), 629 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"), 630 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"), 631 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"), 632 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"), 633 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"), 634 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"), 635 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"), 636 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"), 637 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"), 638 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"), 639 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"), 640}; 641void register_libcore_icu_NativeConverter(JNIEnv* env) { 642 jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods)); 643} 644