UCharacterCaseTest.java revision aefe4d1f8f1773ead1a52f7a5d2c9e0009353600
1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/** 3******************************************************************************* 4* Copyright (C) 1996-2014, International Business Machines Corporation and 5* others. All Rights Reserved. 6******************************************************************************* 7*/ 8 9 10package android.icu.dev.test.lang; 11 12 13import java.io.BufferedReader; 14import java.util.ArrayList; 15import java.util.List; 16import java.util.Locale; 17 18import android.icu.dev.test.TestFmwk; 19import android.icu.dev.test.TestUtil; 20import android.icu.impl.Utility; 21import android.icu.lang.UCharacter; 22import android.icu.lang.UProperty; 23import android.icu.text.BreakIterator; 24import android.icu.text.RuleBasedBreakIterator; 25import android.icu.text.UTF16; 26import android.icu.util.ULocale; 27 28 29/** 30* <p>Testing character casing</p> 31* <p>Mostly following the test cases in strcase.cpp for ICU</p> 32* @author Syn Wee Quek 33* @since march 14 2002 34*/ 35public final class UCharacterCaseTest extends TestFmwk 36{ 37 // constructor ----------------------------------------------------------- 38 39 /** 40 * Constructor 41 */ 42 public UCharacterCaseTest() 43 { 44 } 45 46 // public methods -------------------------------------------------------- 47 48 public static void main(String[] arg) 49 { 50 try 51 { 52 UCharacterCaseTest test = new UCharacterCaseTest(); 53 test.run(arg); 54 } 55 catch (Exception e) 56 { 57 e.printStackTrace(); 58 } 59 } 60 61 /** 62 * Testing the uppercase and lowercase function of UCharacter 63 */ 64 public void TestCharacter() 65 { 66 for (int i = 0; i < CHARACTER_LOWER_.length; i ++) { 67 if (UCharacter.isLetter(CHARACTER_LOWER_[i]) && 68 !UCharacter.isLowerCase(CHARACTER_LOWER_[i])) { 69 errln("FAIL isLowerCase test for \\u" + 70 hex(CHARACTER_LOWER_[i])); 71 break; 72 } 73 if (UCharacter.isLetter(CHARACTER_UPPER_[i]) && 74 !(UCharacter.isUpperCase(CHARACTER_UPPER_[i]) || 75 UCharacter.isTitleCase(CHARACTER_UPPER_[i]))) { 76 errln("FAIL isUpperCase test for \\u" + 77 hex(CHARACTER_UPPER_[i])); 78 break; 79 } 80 if (CHARACTER_LOWER_[i] != 81 UCharacter.toLowerCase(CHARACTER_UPPER_[i]) || 82 (CHARACTER_UPPER_[i] != 83 UCharacter.toUpperCase(CHARACTER_LOWER_[i]) && 84 CHARACTER_UPPER_[i] != 85 UCharacter.toTitleCase(CHARACTER_LOWER_[i]))) { 86 errln("FAIL case conversion test for \\u" + 87 hex(CHARACTER_UPPER_[i]) + 88 " to \\u" + hex(CHARACTER_LOWER_[i])); 89 break; 90 } 91 if (CHARACTER_LOWER_[i] != 92 UCharacter.toLowerCase(CHARACTER_LOWER_[i])) { 93 errln("FAIL lower case conversion test for \\u" + 94 hex(CHARACTER_LOWER_[i])); 95 break; 96 } 97 if (CHARACTER_UPPER_[i] != 98 UCharacter.toUpperCase(CHARACTER_UPPER_[i]) && 99 CHARACTER_UPPER_[i] != 100 UCharacter.toTitleCase(CHARACTER_UPPER_[i])) { 101 errln("FAIL upper case conversion test for \\u" + 102 hex(CHARACTER_UPPER_[i])); 103 break; 104 } 105 logln("Ok \\u" + hex(CHARACTER_UPPER_[i]) + " and \\u" + 106 hex(CHARACTER_LOWER_[i])); 107 } 108 } 109 110 public void TestFolding() 111 { 112 // test simple case folding 113 for (int i = 0; i < FOLDING_SIMPLE_.length; i += 3) { 114 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], true) != 115 FOLDING_SIMPLE_[i + 1]) { 116 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 117 ", true) should be \\u" + hex(FOLDING_SIMPLE_[i + 1])); 118 } 119 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], 120 UCharacter.FOLD_CASE_DEFAULT) != 121 FOLDING_SIMPLE_[i + 1]) { 122 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 123 ", UCharacter.FOLD_CASE_DEFAULT) should be \\u" 124 + hex(FOLDING_SIMPLE_[i + 1])); 125 } 126 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], false) != 127 FOLDING_SIMPLE_[i + 2]) { 128 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 129 ", false) should be \\u" + hex(FOLDING_SIMPLE_[i + 2])); 130 } 131 if (UCharacter.foldCase(FOLDING_SIMPLE_[i], 132 UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) != 133 FOLDING_SIMPLE_[i + 2]) { 134 errln("FAIL: foldCase(\\u" + hex(FOLDING_SIMPLE_[i]) + 135 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) should be \\u" 136 + hex(FOLDING_SIMPLE_[i + 2])); 137 } 138 } 139 140 // Test full string case folding with default option and separate 141 // buffers 142 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], true))) { 143 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 144 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], true)) + 145 " should be " + prettify(FOLDING_DEFAULT_[0])); 146 } 147 148 if (!FOLDING_DEFAULT_[0].equals(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT))) { 149 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 150 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_DEFAULT)) 151 + " should be " + prettify(FOLDING_DEFAULT_[0])); 152 } 153 154 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals( 155 UCharacter.foldCase(FOLDING_MIXED_[0], false))) { 156 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 157 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], false)) 158 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0])); 159 } 160 161 if (!FOLDING_EXCLUDE_SPECIAL_I_[0].equals( 162 UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) { 163 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[0]) + 164 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[0], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)) 165 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[0])); 166 } 167 168 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], true))) { 169 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 170 ", true)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], true)) 171 + " should be " + prettify(FOLDING_DEFAULT_[1])); 172 } 173 174 if (!FOLDING_DEFAULT_[1].equals(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT))) { 175 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 176 ", UCharacter.FOLD_CASE_DEFAULT)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_DEFAULT)) 177 + " should be " + prettify(FOLDING_DEFAULT_[1])); 178 } 179 180 // alternate handling for dotted I/dotless i (U+0130, U+0131) 181 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals( 182 UCharacter.foldCase(FOLDING_MIXED_[1], false))) { 183 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 184 ", false)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], false)) 185 + " should be " + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1])); 186 } 187 188 if (!FOLDING_EXCLUDE_SPECIAL_I_[1].equals( 189 UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I))) { 190 errln("FAIL: foldCase(" + prettify(FOLDING_MIXED_[1]) + 191 ", UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)=" + prettify(UCharacter.foldCase(FOLDING_MIXED_[1], UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I)) 192 + " should be " 193 + prettify(FOLDING_EXCLUDE_SPECIAL_I_[1])); 194 } 195 } 196 197 /** 198 * Testing the strings case mapping methods 199 */ 200 public void TestUpper() 201 { 202 // uppercase with root locale and in the same buffer 203 if (!UPPER_ROOT_.equals(UCharacter.toUpperCase(UPPER_BEFORE_))) { 204 errln("Fail " + UPPER_BEFORE_ + " after uppercase should be " + 205 UPPER_ROOT_ + " instead got " + 206 UCharacter.toUpperCase(UPPER_BEFORE_)); 207 } 208 209 // uppercase with turkish locale and separate buffers 210 if (!UPPER_TURKISH_.equals(UCharacter.toUpperCase(TURKISH_LOCALE_, 211 UPPER_BEFORE_))) { 212 errln("Fail " + UPPER_BEFORE_ + 213 " after turkish-sensitive uppercase should be " + 214 UPPER_TURKISH_ + " instead of " + 215 UCharacter.toUpperCase(TURKISH_LOCALE_, UPPER_BEFORE_)); 216 } 217 218 // uppercase a short string with root locale 219 if (!UPPER_MINI_UPPER_.equals(UCharacter.toUpperCase(UPPER_MINI_))) { 220 errln("error in toUpper(root locale)=\"" + UPPER_MINI_ + 221 "\" expected \"" + UPPER_MINI_UPPER_ + "\""); 222 } 223 224 if (!SHARED_UPPERCASE_TOPKAP_.equals( 225 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_))) { 226 errln("toUpper failed: expected \"" + 227 SHARED_UPPERCASE_TOPKAP_ + "\", got \"" + 228 UCharacter.toUpperCase(SHARED_LOWERCASE_TOPKAP_) + "\"."); 229 } 230 231 if (!SHARED_UPPERCASE_TURKISH_.equals( 232 UCharacter.toUpperCase(TURKISH_LOCALE_, 233 SHARED_LOWERCASE_TOPKAP_))) { 234 errln("toUpper failed: expected \"" + 235 SHARED_UPPERCASE_TURKISH_ + "\", got \"" + 236 UCharacter.toUpperCase(TURKISH_LOCALE_, 237 SHARED_LOWERCASE_TOPKAP_) + "\"."); 238 } 239 240 if (!SHARED_UPPERCASE_GERMAN_.equals( 241 UCharacter.toUpperCase(GERMAN_LOCALE_, 242 SHARED_LOWERCASE_GERMAN_))) { 243 errln("toUpper failed: expected \"" + SHARED_UPPERCASE_GERMAN_ 244 + "\", got \"" + UCharacter.toUpperCase(GERMAN_LOCALE_, 245 SHARED_LOWERCASE_GERMAN_) + "\"."); 246 } 247 248 if (!SHARED_UPPERCASE_GREEK_.equals( 249 UCharacter.toUpperCase(SHARED_LOWERCASE_GREEK_))) { 250 errln("toLower failed: expected \"" + SHARED_UPPERCASE_GREEK_ + 251 "\", got \"" + UCharacter.toUpperCase( 252 SHARED_LOWERCASE_GREEK_) + "\"."); 253 } 254 } 255 256 public void TestLower() 257 { 258 if (!LOWER_ROOT_.equals(UCharacter.toLowerCase(LOWER_BEFORE_))) { 259 errln("Fail " + LOWER_BEFORE_ + " after lowercase should be " + 260 LOWER_ROOT_ + " instead of " + 261 UCharacter.toLowerCase(LOWER_BEFORE_)); 262 } 263 264 // lowercase with turkish locale 265 if (!LOWER_TURKISH_.equals(UCharacter.toLowerCase(TURKISH_LOCALE_, 266 LOWER_BEFORE_))) { 267 errln("Fail " + LOWER_BEFORE_ + 268 " after turkish-sensitive lowercase should be " + 269 LOWER_TURKISH_ + " instead of " + 270 UCharacter.toLowerCase(TURKISH_LOCALE_, LOWER_BEFORE_)); 271 } 272 if (!SHARED_LOWERCASE_ISTANBUL_.equals( 273 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_))) { 274 errln("1. toLower failed: expected \"" + 275 SHARED_LOWERCASE_ISTANBUL_ + "\", got \"" + 276 UCharacter.toLowerCase(SHARED_UPPERCASE_ISTANBUL_) + "\"."); 277 } 278 279 if (!SHARED_LOWERCASE_TURKISH_.equals( 280 UCharacter.toLowerCase(TURKISH_LOCALE_, 281 SHARED_UPPERCASE_ISTANBUL_))) { 282 errln("2. toLower failed: expected \"" + 283 SHARED_LOWERCASE_TURKISH_ + "\", got \"" + 284 UCharacter.toLowerCase(TURKISH_LOCALE_, 285 SHARED_UPPERCASE_ISTANBUL_) + "\"."); 286 } 287 if (!SHARED_LOWERCASE_GREEK_.equals( 288 UCharacter.toLowerCase(GREEK_LOCALE_, 289 SHARED_UPPERCASE_GREEK_))) { 290 errln("toLower failed: expected \"" + SHARED_LOWERCASE_GREEK_ + 291 "\", got \"" + UCharacter.toLowerCase(GREEK_LOCALE_, 292 SHARED_UPPERCASE_GREEK_) + "\"."); 293 } 294 } 295 296 public void TestTitleRegression() throws java.io.IOException { 297 boolean isIgnorable = UCharacter.hasBinaryProperty('\'', UProperty.CASE_IGNORABLE); 298 assertTrue("Case Ignorable check of ASCII apostrophe", isIgnorable); 299 assertEquals("Titlecase check", 300 "The Quick Brown Fox Can't Jump Over The Lazy Dogs.", 301 UCharacter.toTitleCase(ULocale.ENGLISH, "THE QUICK BROWN FOX CAN'T JUMP OVER THE LAZY DOGS.", null)); 302 } 303 304 public void TestTitle() 305 { 306 try{ 307 for (int i = 0; i < TITLE_DATA_.length;) { 308 String test = TITLE_DATA_[i++]; 309 String expected = TITLE_DATA_[i++]; 310 ULocale locale = new ULocale(TITLE_DATA_[i++]); 311 int breakType = Integer.parseInt(TITLE_DATA_[i++]); 312 String optionsString = TITLE_DATA_[i++]; 313 BreakIterator iter = 314 breakType >= 0 ? 315 BreakIterator.getBreakInstance(locale, breakType) : 316 breakType == -2 ? 317 // Open a trivial break iterator that only delivers { 0, length } 318 // or even just { 0 } as boundaries. 319 new RuleBasedBreakIterator(".*;") : 320 null; 321 int options = 0; 322 if (optionsString.indexOf('L') >= 0) { 323 options |= UCharacter.TITLECASE_NO_LOWERCASE; 324 } 325 if (optionsString.indexOf('A') >= 0) { 326 options |= UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT; 327 } 328 String result = UCharacter.toTitleCase(locale, test, iter, options); 329 if (!expected.equals(result)) { 330 errln("titlecasing for " + prettify(test) + " (options " + options + ") should be " + 331 prettify(expected) + " but got " + 332 prettify(result)); 333 } 334 if (options == 0) { 335 result = UCharacter.toTitleCase(locale, test, iter); 336 if (!expected.equals(result)) { 337 errln("titlecasing for " + prettify(test) + " should be " + 338 prettify(expected) + " but got " + 339 prettify(result)); 340 } 341 } 342 } 343 }catch(Exception ex){ 344 warnln("Could not find data for BreakIterators"); 345 } 346 } 347 348 public void TestDutchTitle() { 349 ULocale LOC_DUTCH = new ULocale("nl"); 350 int options = 0; 351 options |= UCharacter.TITLECASE_NO_LOWERCASE; 352 BreakIterator iter = BreakIterator.getWordInstance(LOC_DUTCH); 353 354 assertEquals("Dutch titlecase check in English", 355 "Ijssel Igloo Ijmuiden", 356 UCharacter.toTitleCase(ULocale.ENGLISH, "ijssel igloo IJMUIDEN", null)); 357 358 assertEquals("Dutch titlecase check in Dutch", 359 "IJssel Igloo IJmuiden", 360 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null)); 361 362 // Also check the behavior using Java Locale 363 Locale JAVALOC_DUTCH = new Locale("nl"); 364 assertEquals("Dutch titlecase check in English (Java Locale)", 365 "Ijssel Igloo Ijmuiden", 366 UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null)); 367 368 assertEquals("Dutch titlecase check in Dutch (Java Locale)", 369 "IJssel Igloo IJmuiden", 370 UCharacter.toTitleCase(JAVALOC_DUTCH, "ijssel igloo IJMUIDEN", null)); 371 372 iter.setText("ijssel igloo IjMUIdEN iPoD ijenough"); 373 assertEquals("Dutch titlecase check in Dutch with nolowercase option", 374 "IJssel Igloo IJMUIdEN IPoD IJenough", 375 UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IjMUIdEN iPoD ijenough", iter, options)); 376 } 377 378 public void TestSpecial() 379 { 380 for (int i = 0; i < SPECIAL_LOCALES_.length; i ++) { 381 int j = i * 3; 382 Locale locale = SPECIAL_LOCALES_[i]; 383 String str = SPECIAL_DATA_[j]; 384 if (locale != null) { 385 if (!SPECIAL_DATA_[j + 1].equals( 386 UCharacter.toLowerCase(locale, str))) { 387 errln("error lowercasing special characters " + 388 hex(str) + " expected " + hex(SPECIAL_DATA_[j + 1]) 389 + " for locale " + locale.toString() + " but got " + 390 hex(UCharacter.toLowerCase(locale, str))); 391 } 392 if (!SPECIAL_DATA_[j + 2].equals( 393 UCharacter.toUpperCase(locale, str))) { 394 errln("error uppercasing special characters " + 395 hex(str) + " expected " + SPECIAL_DATA_[j + 2] 396 + " for locale " + locale.toString() + " but got " + 397 hex(UCharacter.toUpperCase(locale, str))); 398 } 399 } 400 else { 401 if (!SPECIAL_DATA_[j + 1].equals( 402 UCharacter.toLowerCase(str))) { 403 errln("error lowercasing special characters " + 404 hex(str) + " expected " + SPECIAL_DATA_[j + 1] + 405 " but got " + 406 hex(UCharacter.toLowerCase(locale, str))); 407 } 408 if (!SPECIAL_DATA_[j + 2].equals( 409 UCharacter.toUpperCase(locale, str))) { 410 errln("error uppercasing special characters " + 411 hex(str) + " expected " + SPECIAL_DATA_[j + 2] + 412 " but got " + 413 hex(UCharacter.toUpperCase(locale, str))); 414 } 415 } 416 } 417 418 // turkish & azerbaijani dotless i & dotted I 419 // remove dot above if there was a capital I before and there are no 420 // more accents above 421 if (!SPECIAL_DOTTED_LOWER_TURKISH_.equals(UCharacter.toLowerCase( 422 TURKISH_LOCALE_, SPECIAL_DOTTED_))) { 423 errln("error in dots.toLower(tr)=\"" + SPECIAL_DOTTED_ + 424 "\" expected \"" + SPECIAL_DOTTED_LOWER_TURKISH_ + 425 "\" but got " + UCharacter.toLowerCase(TURKISH_LOCALE_, 426 SPECIAL_DOTTED_)); 427 } 428 if (!SPECIAL_DOTTED_LOWER_GERMAN_.equals(UCharacter.toLowerCase( 429 GERMAN_LOCALE_, SPECIAL_DOTTED_))) { 430 errln("error in dots.toLower(de)=\"" + SPECIAL_DOTTED_ + 431 "\" expected \"" + SPECIAL_DOTTED_LOWER_GERMAN_ + 432 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_, 433 SPECIAL_DOTTED_)); 434 } 435 436 // lithuanian dot above in uppercasing 437 if (!SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_.equals( 438 UCharacter.toUpperCase(LITHUANIAN_LOCALE_, SPECIAL_DOT_ABOVE_))) { 439 errln("error in dots.toUpper(lt)=\"" + SPECIAL_DOT_ABOVE_ + 440 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ + 441 "\" but got " + UCharacter.toUpperCase(LITHUANIAN_LOCALE_, 442 SPECIAL_DOT_ABOVE_)); 443 } 444 if (!SPECIAL_DOT_ABOVE_UPPER_GERMAN_.equals(UCharacter.toUpperCase( 445 GERMAN_LOCALE_, SPECIAL_DOT_ABOVE_))) { 446 errln("error in dots.toUpper(de)=\"" + SPECIAL_DOT_ABOVE_ + 447 "\" expected \"" + SPECIAL_DOT_ABOVE_UPPER_GERMAN_ + 448 "\" but got " + UCharacter.toUpperCase(GERMAN_LOCALE_, 449 SPECIAL_DOT_ABOVE_)); 450 } 451 452 // lithuanian adds dot above to i in lowercasing if there are more 453 // above accents 454 if (!SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_.equals( 455 UCharacter.toLowerCase(LITHUANIAN_LOCALE_, 456 SPECIAL_DOT_ABOVE_UPPER_))) { 457 errln("error in dots.toLower(lt)=\"" + SPECIAL_DOT_ABOVE_UPPER_ + 458 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ + 459 "\" but got " + UCharacter.toLowerCase(LITHUANIAN_LOCALE_, 460 SPECIAL_DOT_ABOVE_UPPER_)); 461 } 462 if (!SPECIAL_DOT_ABOVE_LOWER_GERMAN_.equals( 463 UCharacter.toLowerCase(GERMAN_LOCALE_, 464 SPECIAL_DOT_ABOVE_UPPER_))) { 465 errln("error in dots.toLower(de)=\"" + SPECIAL_DOT_ABOVE_UPPER_ + 466 "\" expected \"" + SPECIAL_DOT_ABOVE_LOWER_GERMAN_ + 467 "\" but got " + UCharacter.toLowerCase(GERMAN_LOCALE_, 468 SPECIAL_DOT_ABOVE_UPPER_)); 469 } 470 } 471 472 /** 473 * Tests for case mapping in the file SpecialCasing.txt 474 * This method reads in SpecialCasing.txt file for testing purposes. 475 * A default path is provided relative to the src path, however the user 476 * could set a system property to change the directory path.<br> 477 * e.g. java -DUnicodeData="data_dir_path" com.ibm.dev.test.lang.UCharacterTest 478 */ 479 public void TestSpecialCasingTxt() 480 { 481 try 482 { 483 // reading in the SpecialCasing file 484 BufferedReader input = TestUtil.getDataReader( 485 "unicode/SpecialCasing.txt"); 486 while (true) 487 { 488 String s = input.readLine(); 489 if (s == null) { 490 break; 491 } 492 if (s.length() == 0 || s.charAt(0) == '#') { 493 continue; 494 } 495 496 String chstr[] = getUnicodeStrings(s); 497 StringBuffer strbuffer = new StringBuffer(chstr[0]); 498 StringBuffer lowerbuffer = new StringBuffer(chstr[1]); 499 StringBuffer upperbuffer = new StringBuffer(chstr[3]); 500 Locale locale = null; 501 for (int i = 4; i < chstr.length; i ++) { 502 String condition = chstr[i]; 503 if (Character.isLowerCase(chstr[i].charAt(0))) { 504 // specified locale 505 locale = new Locale(chstr[i], ""); 506 } 507 else if (condition.compareToIgnoreCase("Not_Before_Dot") 508 == 0) { 509 // turns I into dotless i 510 } 511 else if (condition.compareToIgnoreCase( 512 "More_Above") == 0) { 513 strbuffer.append((char)0x300); 514 lowerbuffer.append((char)0x300); 515 upperbuffer.append((char)0x300); 516 } 517 else if (condition.compareToIgnoreCase( 518 "After_Soft_Dotted") == 0) { 519 strbuffer.insert(0, 'i'); 520 lowerbuffer.insert(0, 'i'); 521 String lang = ""; 522 if (locale != null) { 523 lang = locale.getLanguage(); 524 } 525 if (lang.equals("tr") || lang.equals("az")) { 526 // this is to be removed when 4.0 data comes out 527 // and upperbuffer.insert uncommented 528 // see jitterbug 2344 529 chstr[i] = "After_I"; 530 strbuffer.deleteCharAt(0); 531 lowerbuffer.deleteCharAt(0); 532 i --; 533 continue; 534 // upperbuffer.insert(0, '\u0130'); 535 } 536 else { 537 upperbuffer.insert(0, 'I'); 538 } 539 } 540 else if (condition.compareToIgnoreCase( 541 "Final_Sigma") == 0) { 542 strbuffer.insert(0, 'c'); 543 lowerbuffer.insert(0, 'c'); 544 upperbuffer.insert(0, 'C'); 545 } 546 else if (condition.compareToIgnoreCase("After_I") == 0) { 547 strbuffer.insert(0, 'I'); 548 lowerbuffer.insert(0, 'i'); 549 String lang = ""; 550 if (locale != null) { 551 lang = locale.getLanguage(); 552 } 553 if (lang.equals("tr") || lang.equals("az")) { 554 upperbuffer.insert(0, 'I'); 555 } 556 } 557 } 558 chstr[0] = strbuffer.toString(); 559 chstr[1] = lowerbuffer.toString(); 560 chstr[3] = upperbuffer.toString(); 561 if (locale == null) { 562 if (!UCharacter.toLowerCase(chstr[0]).equals(chstr[1])) { 563 errln(s); 564 errln("Fail: toLowerCase for character " + 565 Utility.escape(chstr[0]) + ", expected " 566 + Utility.escape(chstr[1]) + " but resulted in " + 567 Utility.escape(UCharacter.toLowerCase(chstr[0]))); 568 } 569 if (!UCharacter.toUpperCase(chstr[0]).equals(chstr[3])) { 570 errln(s); 571 errln("Fail: toUpperCase for character " + 572 Utility.escape(chstr[0]) + ", expected " 573 + Utility.escape(chstr[3]) + " but resulted in " + 574 Utility.escape(UCharacter.toUpperCase(chstr[0]))); 575 } 576 } 577 else { 578 if (!UCharacter.toLowerCase(locale, chstr[0]).equals( 579 chstr[1])) { 580 errln(s); 581 errln("Fail: toLowerCase for character " + 582 Utility.escape(chstr[0]) + ", expected " 583 + Utility.escape(chstr[1]) + " but resulted in " + 584 Utility.escape(UCharacter.toLowerCase(locale, 585 chstr[0]))); 586 } 587 if (!UCharacter.toUpperCase(locale, chstr[0]).equals( 588 chstr[3])) { 589 errln(s); 590 errln("Fail: toUpperCase for character " + 591 Utility.escape(chstr[0]) + ", expected " 592 + Utility.escape(chstr[3]) + " but resulted in " + 593 Utility.escape(UCharacter.toUpperCase(locale, 594 chstr[0]))); 595 } 596 } 597 } 598 input.close(); 599 } 600 catch (Exception e) 601 { 602 e.printStackTrace(); 603 } 604 } 605 606 public void TestUpperLower() 607 { 608 int upper[] = {0x0041, 0x0042, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 609 0x01c9, 0x000c}; 610 int lower[] = {0x0061, 0x0062, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 611 0x01c9, 0x000c}; 612 String upperTest = "abcdefg123hij.?:klmno"; 613 String lowerTest = "ABCDEFG123HIJ.?:KLMNO"; 614 615 // Checks LetterLike Symbols which were previously a source of 616 // confusion [Bertrand A. D. 02/04/98] 617 for (int i = 0x2100; i < 0x2138; i ++) { 618 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */ 619 if (i != 0x2126 && i != 0x212a && i != 0x212b && i!=0x2132) { 620 if (i != UCharacter.toLowerCase(i)) { // itself 621 errln("Failed case conversion with itself: \\u" 622 + Utility.hex(i, 4)); 623 } 624 if (i != UCharacter.toUpperCase(i)) { 625 errln("Failed case conversion with itself: \\u" 626 + Utility.hex(i, 4)); 627 } 628 } 629 } 630 for (int i = 0; i < upper.length; i ++) { 631 if (UCharacter.toLowerCase(upper[i]) != lower[i]) { 632 errln("FAILED UCharacter.tolower() for \\u" 633 + Utility.hex(upper[i], 4) 634 + " Expected \\u" + Utility.hex(lower[i], 4) 635 + " Got \\u" 636 + Utility.hex(UCharacter.toLowerCase(upper[i]), 4)); 637 } 638 } 639 logln("testing upper lower"); 640 for (int i = 0; i < upperTest.length(); i ++) { 641 logln("testing to upper to lower"); 642 if (UCharacter.isLetter(upperTest.charAt(i)) && 643 !UCharacter.isLowerCase(upperTest.charAt(i))) { 644 errln("Failed isLowerCase test at \\u" 645 + Utility.hex(upperTest.charAt(i), 4)); 646 } 647 else if (UCharacter.isLetter(lowerTest.charAt(i)) 648 && !UCharacter.isUpperCase(lowerTest.charAt(i))) { 649 errln("Failed isUpperCase test at \\u" 650 + Utility.hex(lowerTest.charAt(i), 4)); 651 } 652 else if (upperTest.charAt(i) 653 != UCharacter.toLowerCase(lowerTest.charAt(i))) { 654 errln("Failed case conversion from \\u" 655 + Utility.hex(lowerTest.charAt(i), 4) + " To \\u" 656 + Utility.hex(upperTest.charAt(i), 4)); 657 } 658 else if (lowerTest.charAt(i) 659 != UCharacter.toUpperCase(upperTest.charAt(i))) { 660 errln("Failed case conversion : \\u" 661 + Utility.hex(upperTest.charAt(i), 4) + " To \\u" 662 + Utility.hex(lowerTest.charAt(i), 4)); 663 } 664 else if (upperTest.charAt(i) 665 != UCharacter.toLowerCase(upperTest.charAt(i))) { 666 errln("Failed case conversion with itself: \\u" 667 + Utility.hex(upperTest.charAt(i))); 668 } 669 else if (lowerTest.charAt(i) 670 != UCharacter.toUpperCase(lowerTest.charAt(i))) { 671 errln("Failed case conversion with itself: \\u" 672 + Utility.hex(lowerTest.charAt(i))); 673 } 674 } 675 logln("done testing upper Lower"); 676 } 677 678 // private data members - test data -------------------------------------- 679 680 private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR"); 681 private static final Locale GERMAN_LOCALE_ = new Locale("de", "DE"); 682 private static final Locale GREEK_LOCALE_ = new Locale("el", "GR"); 683 private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US"); 684 private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT"); 685 686 private static final int CHARACTER_UPPER_[] = 687 {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 688 0x00b1, 0x00b2, 0xb3, 0x0048, 0x0049, 0x004a, 0x002e, 689 0x003f, 0x003a, 0x004b, 0x004c, 0x4d, 0x004e, 0x004f, 690 0x01c4, 0x01c8, 0x000c, 0x0000}; 691 private static final int CHARACTER_LOWER_[] = 692 {0x61, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 693 0x00b1, 0x00b2, 0xb3, 0x0068, 0x0069, 0x006a, 0x002e, 694 0x003f, 0x003a, 0x006b, 0x006c, 0x6d, 0x006e, 0x006f, 695 0x01c6, 0x01c9, 0x000c, 0x0000}; 696 697 /* 698 * CaseFolding.txt says about i and its cousins: 699 * 0049; C; 0069; # LATIN CAPITAL LETTER I 700 * 0049; T; 0131; # LATIN CAPITAL LETTER I 701 * 702 * 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 703 * 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 704 * That's all. 705 * See CaseFolding.txt and the Unicode Standard for how to apply the case foldings. 706 */ 707 private static final int FOLDING_SIMPLE_[] = { 708 // input, default, exclude special i 709 0x61, 0x61, 0x61, 710 0x49, 0x69, 0x131, 711 0x130, 0x130, 0x69, 712 0x131, 0x131, 0x131, 713 0xdf, 0xdf, 0xdf, 714 0xfb03, 0xfb03, 0xfb03, 715 0x1040e,0x10436,0x10436, 716 0x5ffff,0x5ffff,0x5ffff 717 }; 718 private static final String FOLDING_MIXED_[] = 719 {"\u0061\u0042\u0130\u0049\u0131\u03d0\u00df\ufb03\ud93f\udfff", 720 "A\u00df\u00b5\ufb03\uD801\uDC0C\u0130\u0131"}; 721 private static final String FOLDING_DEFAULT_[] = 722 {"\u0061\u0062\u0069\u0307\u0069\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff", 723 "ass\u03bcffi\uD801\uDC34i\u0307\u0131"}; 724 private static final String FOLDING_EXCLUDE_SPECIAL_I_[] = 725 {"\u0061\u0062\u0069\u0131\u0131\u03b2\u0073\u0073\u0066\u0066\u0069\ud93f\udfff", 726 "ass\u03bcffi\uD801\uDC34i\u0131"}; 727 /** 728 * "IESUS CHRISTOS" 729 */ 730 private static final String SHARED_UPPERCASE_GREEK_ = 731 "\u0399\u0395\u03a3\u03a5\u03a3\u0020\u03a7\u03a1\u0399\u03a3\u03a4\u039f\u03a3"; 732 /** 733 * "iesus christos" 734 */ 735 private static final String SHARED_LOWERCASE_GREEK_ = 736 "\u03b9\u03b5\u03c3\u03c5\u03c2\u0020\u03c7\u03c1\u03b9\u03c3\u03c4\u03bf\u03c2"; 737 private static final String SHARED_LOWERCASE_TURKISH_ = 738 "\u0069\u0073\u0074\u0061\u006e\u0062\u0075\u006c\u002c\u0020\u006e\u006f\u0074\u0020\u0063\u006f\u006e\u0073\u0074\u0061\u006e\u0074\u0131\u006e\u006f\u0070\u006c\u0065\u0021"; 739 private static final String SHARED_UPPERCASE_TURKISH_ = 740 "\u0054\u004f\u0050\u004b\u0041\u0050\u0049\u0020\u0050\u0041\u004c\u0041\u0043\u0045\u002c\u0020\u0130\u0053\u0054\u0041\u004e\u0042\u0055\u004c"; 741 private static final String SHARED_UPPERCASE_ISTANBUL_ = 742 "\u0130STANBUL, NOT CONSTANTINOPLE!"; 743 private static final String SHARED_LOWERCASE_ISTANBUL_ = 744 "i\u0307stanbul, not constantinople!"; 745 private static final String SHARED_LOWERCASE_TOPKAP_ = 746 "topkap\u0131 palace, istanbul"; 747 private static final String SHARED_UPPERCASE_TOPKAP_ = 748 "TOPKAPI PALACE, ISTANBUL"; 749 private static final String SHARED_LOWERCASE_GERMAN_ = 750 "S\u00FC\u00DFmayrstra\u00DFe"; 751 private static final String SHARED_UPPERCASE_GERMAN_ = 752 "S\u00DCSSMAYRSTRASSE"; 753 754 private static final String UPPER_BEFORE_ = 755 "\u0061\u0042\u0069\u03c2\u00df\u03c3\u002f\ufb03\ufb03\ufb03\ud93f\udfff"; 756 private static final String UPPER_ROOT_ = 757 "\u0041\u0042\u0049\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff"; 758 private static final String UPPER_TURKISH_ = 759 "\u0041\u0042\u0130\u03a3\u0053\u0053\u03a3\u002f\u0046\u0046\u0049\u0046\u0046\u0049\u0046\u0046\u0049\ud93f\udfff"; 760 private static final String UPPER_MINI_ = "\u00df\u0061"; 761 private static final String UPPER_MINI_UPPER_ = "\u0053\u0053\u0041"; 762 763 private static final String LOWER_BEFORE_ = 764 "\u0061\u0042\u0049\u03a3\u00df\u03a3\u002f\ud93f\udfff"; 765 private static final String LOWER_ROOT_ = 766 "\u0061\u0062\u0069\u03c3\u00df\u03c2\u002f\ud93f\udfff"; 767 private static final String LOWER_TURKISH_ = 768 "\u0061\u0062\u0131\u03c3\u00df\u03c2\u002f\ud93f\udfff"; 769 770 /** 771 * each item is an array with input string, result string, locale ID, break iterator, options 772 * the break iterator is specified as an int, same as in BreakIterator.KIND_*: 773 * 0=KIND_CHARACTER 1=KIND_WORD 2=KIND_LINE 3=KIND_SENTENCE 4=KIND_TITLE -1=default (NULL=words) -2=no breaks (.*) 774 * options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT 775 * see ICU4C source/test/testdata/casing.txt 776 */ 777 private static final String TITLE_DATA_[] = { 778 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff", 779 "\u0041\u0042\u0020\u0049\u03a3\u0020\u0053\u0073\u03a3\u002f\u0046\u0066\u0069\ud93f\udfff", 780 "", 781 "0", 782 "", 783 784 "\u0061\u0042\u0020\u0069\u03c2\u0020\u00df\u03c3\u002f\ufb03\ud93f\udfff", 785 "\u0041\u0062\u0020\u0049\u03c2\u0020\u0053\u0073\u03c3\u002f\u0046\u0066\u0069\ud93f\udfff", 786 "", 787 "1", 788 "", 789 790 "\u02bbaMeLikA huI P\u016b \u02bb\u02bb\u02bbiA", "\u02bbAmelika Hui P\u016b \u02bb\u02bb\u02bbIa", // titlecase first _cased_ letter, j4933 791 "", 792 "-1", 793 "", 794 795 " tHe QUIcK bRoWn", " The Quick Brown", 796 "", 797 "4", 798 "", 799 800 "\u01c4\u01c5\u01c6\u01c7\u01c8\u01c9\u01ca\u01cb\u01cc", 801 "\u01c5\u01c5\u01c5\u01c8\u01c8\u01c8\u01cb\u01cb\u01cb", // UBRK_CHARACTER 802 "", 803 "0", 804 "", 805 806 "\u01c9ubav ljubav", "\u01c8ubav Ljubav", // Lj vs. L+j 807 "", 808 "-1", 809 "", 810 811 "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", 812 "", 813 "-1", 814 "", 815 816 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 817 "A \u02bbCat. A \u02bbDog! \u02bbEtc.", 818 "", 819 "-1", 820 "", // default 821 822 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 823 "A \u02bbcat. A \u02bbdog! \u02bbetc.", 824 "", 825 "-1", 826 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT 827 828 "a \u02bbCaT. A \u02bbdOg! \u02bbeTc.", 829 "A \u02bbCaT. A \u02bbdOg! \u02bbETc.", 830 "", 831 "3", 832 "L", // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE 833 834 835 "\u02bbcAt! \u02bbeTc.", 836 "\u02bbCat! \u02bbetc.", 837 "", 838 "-2", 839 "", // -2=Trivial break iterator 840 841 "\u02bbcAt! \u02bbeTc.", 842 "\u02bbcat! \u02bbetc.", 843 "", 844 "-2", 845 "A", // U_TITLECASE_NO_BREAK_ADJUSTMENT 846 847 "\u02bbcAt! \u02bbeTc.", 848 "\u02bbCAt! \u02bbeTc.", 849 "", 850 "-2", 851 "L", // U_TITLECASE_NO_LOWERCASE 852 853 "\u02bbcAt! \u02bbeTc.", 854 "\u02bbcAt! \u02bbeTc.", 855 "", 856 "-2", 857 "AL", // Both options 858 859 // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError 860 // when TITLECASE_NO_LOWERCASE encounters a single-letter word 861 "a b c", 862 "A B C", 863 "", 864 "1", 865 "L" // U_TITLECASE_NO_LOWERCASE 866 }; 867 868 869 /** 870 * <p>basic string, lower string, upper string, title string</p> 871 */ 872 private static final String SPECIAL_DATA_[] = { 873 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414), 874 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C), 875 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414), 876 "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + 877 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x10414), 878 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + 879 UTF16.valueOf(0x1043C) + UTF16.valueOf(0x1043C), 880 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + 881 UTF16.valueOf(0x10414) + UTF16.valueOf(0x10414), 882 // sigmas followed/preceded by cased letters 883 "i\u0307\u03a3\u0308j \u0307\u03a3\u0308j i\u00ad\u03a3\u0308 \u0307\u03a3\u0308 ", 884 "i\u0307\u03c3\u0308j \u0307\u03c3\u0308j i\u00ad\u03c2\u0308 \u0307\u03c3\u0308 ", 885 "I\u0307\u03a3\u0308J \u0307\u03a3\u0308J I\u00ad\u03a3\u0308 \u0307\u03a3\u0308 " 886 }; 887 private static final Locale SPECIAL_LOCALES_[] = { 888 null, 889 ENGLISH_LOCALE_, 890 null, 891 }; 892 893 private static final String SPECIAL_DOTTED_ = 894 "I \u0130 I\u0307 I\u0327\u0307 I\u0301\u0307 I\u0327\u0307\u0301"; 895 private static final String SPECIAL_DOTTED_LOWER_TURKISH_ = 896 "\u0131 i i i\u0327 \u0131\u0301\u0307 i\u0327\u0301"; 897 private static final String SPECIAL_DOTTED_LOWER_GERMAN_ = 898 "i i\u0307 i\u0307 i\u0327\u0307 i\u0301\u0307 i\u0327\u0307\u0301"; 899 private static final String SPECIAL_DOT_ABOVE_ = 900 "a\u0307 \u0307 i\u0307 j\u0327\u0307 j\u0301\u0307"; 901 private static final String SPECIAL_DOT_ABOVE_UPPER_LITHUANIAN_ = 902 "A\u0307 \u0307 I J\u0327 J\u0301\u0307"; 903 private static final String SPECIAL_DOT_ABOVE_UPPER_GERMAN_ = 904 "A\u0307 \u0307 I\u0307 J\u0327\u0307 J\u0301\u0307"; 905 private static final String SPECIAL_DOT_ABOVE_UPPER_ = 906 "I I\u0301 J J\u0301 \u012e \u012e\u0301 \u00cc\u00cd\u0128"; 907 private static final String SPECIAL_DOT_ABOVE_LOWER_LITHUANIAN_ = 908 "i i\u0307\u0301 j j\u0307\u0301 \u012f \u012f\u0307\u0301 i\u0307\u0300i\u0307\u0301i\u0307\u0303"; 909 private static final String SPECIAL_DOT_ABOVE_LOWER_GERMAN_ = 910 "i i\u0301 j j\u0301 \u012f \u012f\u0301 \u00ec\u00ed\u0129"; 911 912 // private methods ------------------------------------------------------- 913 914 /** 915 * Converting the hex numbers represented betwee n ';' to Unicode strings 916 * @param str string to break up into Unicode strings 917 * @return array of Unicode strings ending with a null 918 */ 919 private String[] getUnicodeStrings(String str) 920 { 921 List<String> v = new ArrayList<String>(10); 922 int start = 0; 923 for (int casecount = 4; casecount > 0; casecount --) { 924 int end = str.indexOf("; ", start); 925 String casestr = str.substring(start, end); 926 StringBuffer buffer = new StringBuffer(); 927 int spaceoffset = 0; 928 while (spaceoffset < casestr.length()) { 929 int nextspace = casestr.indexOf(' ', spaceoffset); 930 if (nextspace == -1) { 931 nextspace = casestr.length(); 932 } 933 buffer.append((char)Integer.parseInt( 934 casestr.substring(spaceoffset, nextspace), 935 16)); 936 spaceoffset = nextspace + 1; 937 } 938 start = end + 2; 939 v.add(buffer.toString()); 940 } 941 int comments = str.indexOf(" #", start); 942 if (comments != -1 && comments != start) { 943 if (str.charAt(comments - 1) == ';') { 944 comments --; 945 } 946 String conditions = str.substring(start, comments); 947 int offset = 0; 948 while (offset < conditions.length()) { 949 int spaceoffset = conditions.indexOf(' ', offset); 950 if (spaceoffset == -1) { 951 spaceoffset = conditions.length(); 952 } 953 v.add(conditions.substring(offset, spaceoffset)); 954 offset = spaceoffset + 1; 955 } 956 } 957 int size = v.size(); 958 String result[] = new String[size]; 959 for (int i = 0; i < size; i ++) { 960 result[i] = v.get(i); 961 } 962 return result; 963 } 964} 965