1/** 2******************************************************************************* 3* Copyright (C) 1996-2006, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6* 7* 8******************************************************************************* 9*/ 10/* 11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved 12 * A JNI wrapper to ICU native converter Interface 13 * @author: Ram Viswanadha 14 */ 15 16#define LOG_TAG "NativeConverter" 17 18#include "JNIHelp.h" 19#include "JniConstants.h" 20#include "JniException.h" 21#include "ScopedLocalRef.h" 22#include "ScopedPrimitiveArray.h" 23#include "ScopedStringChars.h" 24#include "ScopedUtfChars.h" 25#include "UniquePtr.h" 26#include "cutils/log.h" 27#include "toStringArray.h" 28#include "unicode/ucnv.h" 29#include "unicode/ucnv_cb.h" 30#include "unicode/uniset.h" 31#include "unicode/ustring.h" 32#include "unicode/utypes.h" 33 34#include <vector> 35 36#include <stdlib.h> 37#include <string.h> 38 39#define NativeConverter_REPORT 0 40#define NativeConverter_IGNORE 1 41#define NativeConverter_REPLACE 2 42 43#define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH 44 45struct DecoderCallbackContext { 46 UChar replacementChars[MAX_REPLACEMENT_LENGTH]; 47 size_t replacementCharCount; 48 UConverterToUCallback onUnmappableInput; 49 UConverterToUCallback onMalformedInput; 50}; 51 52struct EncoderCallbackContext { 53 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 54 size_t replacementByteCount; 55 UConverterFromUCallback onUnmappableInput; 56 UConverterFromUCallback onMalformedInput; 57}; 58 59struct UConverterDeleter { 60 void operator()(UConverter* p) const { 61 ucnv_close(p); 62 } 63}; 64typedef UniquePtr<UConverter, UConverterDeleter> UniqueUConverter; 65 66static UConverter* toUConverter(jlong address) { 67 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address)); 68} 69 70static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) { 71 ScopedUtfChars converterNameChars(env, converterName); 72 if (converterNameChars.c_str() == NULL) { 73 return 0; 74 } 75 UErrorCode status = U_ZERO_ERROR; 76 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status); 77 maybeThrowIcuException(env, "ucnv_open", status); 78 return reinterpret_cast<uintptr_t>(cnv); 79} 80 81static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) { 82 ucnv_close(toUConverter(address)); 83} 84 85static bool shouldCodecThrow(jboolean flush, UErrorCode error) { 86 if (flush) { 87 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND); 88 } else { 89 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND); 90 } 91} 92 93static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address, 94 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, 95 jintArray data, jboolean flush) { 96 97 UConverter* cnv = toUConverter(address); 98 if (cnv == NULL) { 99 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 100 return U_ILLEGAL_ARGUMENT_ERROR; 101 } 102 ScopedCharArrayRO uSource(env, source); 103 if (uSource.get() == NULL) { 104 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 105 return U_ILLEGAL_ARGUMENT_ERROR; 106 } 107 ScopedByteArrayRW uTarget(env, target); 108 if (uTarget.get() == NULL) { 109 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 110 return U_ILLEGAL_ARGUMENT_ERROR; 111 } 112 ScopedIntArrayRW myData(env, data); 113 if (myData.get() == NULL) { 114 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 115 return U_ILLEGAL_ARGUMENT_ERROR; 116 } 117 118 // Do the conversion. 119 jint* sourceOffset = &myData[0]; 120 jint* targetOffset = &myData[1]; 121 const jchar* mySource = uSource.get() + *sourceOffset; 122 const UChar* mySourceLimit= uSource.get() + sourceEnd; 123 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 124 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd); 125 UErrorCode errorCode = U_ZERO_ERROR; 126 ucnv_fromUnicode(cnv , &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, (UBool) flush, &errorCode); 127 *sourceOffset = (mySource - uSource.get()) - *sourceOffset; 128 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()) - *targetOffset; 129 130 // If there was an error, count the problematic characters. 131 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 132 int8_t invalidUCharCount = 32; 133 UChar invalidUChars[32]; 134 UErrorCode minorErrorCode = U_ZERO_ERROR; 135 ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode); 136 if (U_SUCCESS(minorErrorCode)) { 137 myData[2] = invalidUCharCount; 138 } 139 } 140 141 // Managed code handles some cases; throw all other errors. 142 if (shouldCodecThrow(flush, errorCode)) { 143 maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode); 144 } 145 return errorCode; 146} 147 148static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address, 149 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, 150 jintArray data, jboolean flush) { 151 152 UConverter* cnv = toUConverter(address); 153 if (cnv == NULL) { 154 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 155 return U_ILLEGAL_ARGUMENT_ERROR; 156 } 157 ScopedByteArrayRO uSource(env, source); 158 if (uSource.get() == NULL) { 159 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 160 return U_ILLEGAL_ARGUMENT_ERROR; 161 } 162 ScopedCharArrayRW uTarget(env, target); 163 if (uTarget.get() == NULL) { 164 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 165 return U_ILLEGAL_ARGUMENT_ERROR; 166 } 167 ScopedIntArrayRW myData(env, data); 168 if (myData.get() == NULL) { 169 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 170 return U_ILLEGAL_ARGUMENT_ERROR; 171 } 172 173 // Do the conversion. 174 jint* sourceOffset = &myData[0]; 175 jint* targetOffset = &myData[1]; 176 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset); 177 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); 178 UChar* cTarget = uTarget.get() + *targetOffset; 179 const UChar* cTargetLimit = uTarget.get() + targetEnd; 180 UErrorCode errorCode = U_ZERO_ERROR; 181 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode); 182 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset; 183 *targetOffset = cTarget - uTarget.get() - *targetOffset; 184 185 // If there was an error, count the problematic bytes. 186 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND) { 187 int8_t invalidByteCount = 32; 188 char invalidBytes[32] = {'\0'}; 189 UErrorCode minorErrorCode = U_ZERO_ERROR; 190 ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode); 191 if (U_SUCCESS(minorErrorCode)) { 192 myData[2] = invalidByteCount; 193 } 194 } 195 196 // Managed code handles some cases; throw all other errors. 197 if (shouldCodecThrow(flush, errorCode)) { 198 maybeThrowIcuException(env, "ucnv_toUnicode", errorCode); 199 } 200 return errorCode; 201} 202 203static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) { 204 UConverter* cnv = toUConverter(address); 205 if (cnv) { 206 ucnv_resetToUnicode(cnv); 207 } 208} 209 210static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) { 211 UConverter* cnv = toUConverter(address); 212 if (cnv) { 213 ucnv_resetFromUnicode(cnv); 214 } 215} 216 217static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) { 218 UConverter* cnv = toUConverter(address); 219 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; 220} 221 222static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) { 223 UConverter* cnv = toUConverter(address); 224 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1; 225} 226 227static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) { 228 UConverter* cnv = toUConverter(address); 229 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; 230} 231 232static jboolean NativeConverter_canEncode(JNIEnv*, jclass, jlong address, jint codeUnit) { 233 UErrorCode errorCode = U_ZERO_ERROR; 234 UConverter* cnv = toUConverter(address); 235 if (cnv == NULL) { 236 return JNI_FALSE; 237 } 238 239 UChar srcBuffer[3]; 240 const UChar* src = &srcBuffer[0]; 241 const UChar* srcLimit = (codeUnit < 0x10000) ? &src[1] : &src[2]; 242 243 char dstBuffer[5]; 244 char* dst = &dstBuffer[0]; 245 const char* dstLimit = &dstBuffer[4]; 246 247 int i = 0; 248 UTF_APPEND_CHAR(&srcBuffer[0], i, 2, codeUnit); 249 250 ucnv_fromUnicode(cnv, &dst, dstLimit, &src, srcLimit, NULL, TRUE, &errorCode); 251 return U_SUCCESS(errorCode); 252} 253 254/* 255 * If a charset listed in the IANA Charset Registry is supported by an implementation 256 * of the Java platform then its canonical name must be the name listed in the registry. 257 * Many charsets are given more than one name in the registry, in which case the registry 258 * identifies one of the names as MIME-preferred. If a charset has more than one registry 259 * name then its canonical name must be the MIME-preferred name and the other names in 260 * the registry must be valid aliases. If a supported charset is not listed in the IANA 261 * registry then its canonical name must begin with one of the strings "X-" or "x-". 262 */ 263static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) { 264 UErrorCode status = U_ZERO_ERROR; 265 266 // Check to see if this is a well-known MIME or IANA name. 267 const char* cName = NULL; 268 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { 269 return env->NewStringUTF(cName); 270 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { 271 return env->NewStringUTF(cName); 272 } 273 274 // Check to see if an alias already exists with "x-" prefix, if yes then 275 // make that the canonical name. 276 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); 277 for (int i = 0; i < aliasCount; ++i) { 278 const char* name = ucnv_getAlias(icuCanonicalName, i, &status); 279 if (name != NULL && name[0] == 'x' && name[1] == '-') { 280 return env->NewStringUTF(name); 281 } 282 } 283 284 // As a last resort, prepend "x-" to any alias and make that the canonical name. 285 status = U_ZERO_ERROR; 286 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); 287 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { 288 name = ucnv_getAlias(icuCanonicalName, 1, &status); 289 } 290 // If there is no UTR22 canonical name then just return the original name. 291 if (name == NULL) { 292 name = icuCanonicalName; 293 } 294 UniquePtr<char[]> result(new char[2 + strlen(name) + 1]); 295 strcpy(&result[0], "x-"); 296 strcat(&result[0], name); 297 return env->NewStringUTF(&result[0]); 298} 299 300static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) { 301 int32_t num = ucnv_countAvailable(); 302 jobjectArray result = env->NewObjectArray(num, JniConstants::stringClass, NULL); 303 if (result == NULL) { 304 return NULL; 305 } 306 for (int i = 0; i < num; ++i) { 307 const char* name = ucnv_getAvailableName(i); 308 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name)); 309 if (javaCanonicalName.get() == NULL) { 310 return NULL; 311 } 312 env->SetObjectArrayElement(result, i, javaCanonicalName.get()); 313 if (env->ExceptionCheck()) { 314 return NULL; 315 } 316 } 317 return result; 318} 319 320static jobjectArray getAliases(JNIEnv* env, const char* icuCanonicalName) { 321 // Get an upper bound on the number of aliases... 322 const char* myEncName = icuCanonicalName; 323 UErrorCode error = U_ZERO_ERROR; 324 size_t aliasCount = ucnv_countAliases(myEncName, &error); 325 if (aliasCount == 0 && myEncName[0] == 'x' && myEncName[1] == '-') { 326 myEncName = myEncName + 2; 327 aliasCount = ucnv_countAliases(myEncName, &error); 328 } 329 if (!U_SUCCESS(error)) { 330 return NULL; 331 } 332 333 // Collect the aliases we want... 334 std::vector<std::string> aliases; 335 for (size_t i = 0; i < aliasCount; ++i) { 336 const char* name = ucnv_getAlias(myEncName, i, &error); 337 if (!U_SUCCESS(error)) { 338 return NULL; 339 } 340 // TODO: why do we ignore these ones? 341 if (strchr(name, '+') == 0 && strchr(name, ',') == 0) { 342 aliases.push_back(name); 343 } 344 } 345 return toStringArray(env, aliases); 346} 347 348static const char* getICUCanonicalName(const char* name) { 349 UErrorCode error = U_ZERO_ERROR; 350 const char* canonicalName = NULL; 351 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) { 352 return canonicalName; 353 } else if((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) { 354 return canonicalName; 355 } else if((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) { 356 return canonicalName; 357 } else if((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) { 358 /* we have some aliases in the form x-blah .. match those first */ 359 return canonicalName; 360 } else if (strstr(name, "x-") == name) { 361 /* check if the converter can be opened with the name given */ 362 error = U_ZERO_ERROR; 363 UniqueUConverter cnv(ucnv_open(name + 2, &error)); 364 if (cnv.get() != NULL) { 365 return name + 2; 366 } 367 } 368 return NULL; 369} 370 371static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args, 372 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, 373 UErrorCode* status) { 374 if (!rawContext) { 375 return; 376 } 377 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 378 switch(reason) { 379 case UCNV_UNASSIGNED: 380 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status); 381 return; 382 case UCNV_ILLEGAL: 383 case UCNV_IRREGULAR: 384 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status); 385 return; 386 case UCNV_CLOSE: 387 delete ctx; 388 return; 389 default: 390 *status = U_ILLEGAL_ARGUMENT_ERROR; 391 return; 392 } 393} 394 395static void encoderReplaceCallback(const void* rawContext, 396 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32, 397 UConverterCallbackReason, UErrorCode * err) { 398 if (rawContext == NULL) { 399 return; 400 } 401 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 402 *err = U_ZERO_ERROR; 403 ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err); 404} 405 406static UConverterFromUCallback getFromUCallback(int32_t mode) { 407 switch(mode) { 408 case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP; 409 case NativeConverter_REPLACE: return encoderReplaceCallback; 410 case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP; 411 } 412 abort(); 413} 414 415static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, 416 jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) { 417 UConverter* cnv = toUConverter(address); 418 if (cnv == NULL) { 419 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 420 return; 421 } 422 423 UConverterFromUCallback oldCallback = NULL; 424 const void* oldCallbackContext = NULL; 425 ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext)); 426 427 EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>( 428 reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext)); 429 if (callbackContext == NULL) { 430 callbackContext = new EncoderCallbackContext; 431 } 432 433 callbackContext->onMalformedInput = getFromUCallback(onMalformedInput); 434 callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput); 435 436 ScopedByteArrayRO replacementBytes(env, javaReplacement); 437 if (replacementBytes.get() == NULL) { 438 maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR); 439 return; 440 } 441 memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size()); 442 callbackContext->replacementByteCount = replacementBytes.size(); 443 444 UErrorCode errorCode = U_ZERO_ERROR; 445 ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 446 maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode); 447} 448 449static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { 450 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is 451 // never true for us. 452 *err = U_ZERO_ERROR; 453} 454 455static void decoderReplaceCallback(const void* rawContext, 456 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, 457 UErrorCode* err) { 458 if (!rawContext) { 459 return; 460 } 461 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 462 *err = U_ZERO_ERROR; 463 ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err); 464} 465 466static UConverterToUCallback getToUCallback(int32_t mode) { 467 switch (mode) { 468 case NativeConverter_IGNORE: return decoderIgnoreCallback; 469 case NativeConverter_REPLACE: return decoderReplaceCallback; 470 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; 471 } 472 abort(); 473} 474 475static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args, 476 const char* codeUnits, int32_t length, 477 UConverterCallbackReason reason, UErrorCode* status) { 478 if (!rawContext) { 479 return; 480 } 481 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 482 switch(reason) { 483 case UCNV_UNASSIGNED: 484 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status); 485 return; 486 case UCNV_ILLEGAL: 487 case UCNV_IRREGULAR: 488 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status); 489 return; 490 case UCNV_CLOSE: 491 delete ctx; 492 return; 493 default: 494 *status = U_ILLEGAL_ARGUMENT_ERROR; 495 return; 496 } 497} 498 499static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address, 500 jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) { 501 UConverter* cnv = toUConverter(address); 502 if (cnv == NULL) { 503 maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR); 504 return; 505 } 506 507 UConverterToUCallback oldCallback; 508 const void* oldCallbackContext; 509 ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext); 510 511 DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>( 512 reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext)); 513 if (callbackContext == NULL) { 514 callbackContext = new DecoderCallbackContext; 515 } 516 517 callbackContext->onMalformedInput = getToUCallback(onMalformedInput); 518 callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput); 519 520 ScopedStringChars replacement(env, javaReplacement); 521 if (replacement.get() == NULL) { 522 maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR); 523 return; 524 } 525 u_strncpy(callbackContext->replacementChars, replacement.get(), replacement.size()); 526 callbackContext->replacementCharCount = replacement.size(); 527 528 UErrorCode errorCode = U_ZERO_ERROR; 529 ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 530 maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode); 531} 532 533static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) { 534 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle)); 535} 536 537static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) { 538 UConverter* cnv = toUConverter(address); 539 if (cnv == NULL) { 540 return NULL; 541 } 542 UErrorCode status = U_ZERO_ERROR; 543 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 544 int8_t len = sizeof(replacementBytes); 545 ucnv_getSubstChars(cnv, replacementBytes, &len, &status); 546 if (!U_SUCCESS(status)) { 547 return env->NewByteArray(0); 548 } 549 jbyteArray result = env->NewByteArray(len); 550 if (result == NULL) { 551 return NULL; 552 } 553 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes)); 554 return result; 555} 556 557static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { 558 ScopedUtfChars name1Chars(env, name1); 559 if (name1Chars.c_str() == NULL) { 560 return JNI_FALSE; 561 } 562 ScopedUtfChars name2Chars(env, name2); 563 if (name2Chars.c_str() == NULL) { 564 return JNI_FALSE; 565 } 566 567 UErrorCode errorCode = U_ZERO_ERROR; 568 UniqueUConverter converter1(ucnv_open(name1Chars.c_str(), &errorCode)); 569 UnicodeSet set1; 570 ucnv_getUnicodeSet(converter1.get(), set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 571 572 UniqueUConverter converter2(ucnv_open(name2Chars.c_str(), &errorCode)); 573 UnicodeSet set2; 574 ucnv_getUnicodeSet(converter2.get(), set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 575 576 return U_SUCCESS(errorCode) && set1.containsAll(set2); 577} 578 579static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) { 580 ScopedUtfChars charsetNameChars(env, charsetName); 581 if (charsetNameChars.c_str() == NULL) { 582 return NULL; 583 } 584 // Get ICU's canonical name for this charset. 585 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str()); 586 if (icuCanonicalName == NULL) { 587 return NULL; 588 } 589 // Get Java's canonical name for this charset. 590 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName); 591 if (env->ExceptionOccurred()) { 592 return NULL; 593 } 594 595 // Check that this charset is supported. 596 // ICU doesn't offer any "isSupported", so we just open and immediately close. 597 // We ignore the UErrorCode because ucnv_open returning NULL is all the information we need. 598 UErrorCode dummy = U_ZERO_ERROR; 599 UniqueUConverter cnv(ucnv_open(icuCanonicalName, &dummy)); 600 if (cnv.get() == NULL) { 601 return NULL; 602 } 603 cnv.reset(); 604 605 // Get the aliases for this charset. 606 jobjectArray aliases = getAliases(env, icuCanonicalName); 607 if (env->ExceptionOccurred()) { 608 return NULL; 609 } 610 611 // Construct the CharsetICU object. 612 jmethodID charsetConstructor = env->GetMethodID(JniConstants::charsetICUClass, "<init>", 613 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"); 614 if (env->ExceptionOccurred()) { 615 return NULL; 616 } 617 return env->NewObject(JniConstants::charsetICUClass, charsetConstructor, 618 javaCanonicalName, env->NewStringUTF(icuCanonicalName), aliases); 619} 620 621static JNINativeMethod gMethods[] = { 622 NATIVE_METHOD(NativeConverter, canEncode, "(JI)Z"), 623 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"), 624 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"), 625 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"), 626 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"), 627 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"), 628 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"), 629 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"), 630 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"), 631 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"), 632 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"), 633 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"), 634 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"), 635 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"), 636 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"), 637 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"), 638 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"), 639}; 640void register_libcore_icu_NativeConverter(JNIEnv* env) { 641 jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods)); 642} 643