1/* 2 * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26/* 27 ******************************************************************************* 28 * Copyright (C) 2009-2010, International Business Machines Corporation and * 29 * others. All Rights Reserved. * 30 ******************************************************************************* 31 */ 32package sun.util.locale; 33 34import java.util.ArrayList; 35import java.util.HashMap; 36import java.util.HashSet; 37import java.util.List; 38import java.util.Map; 39import java.util.Set; 40 41public final class InternalLocaleBuilder { 42 43 private static final CaseInsensitiveChar PRIVATEUSE_KEY 44 = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE); 45 46 private String language = ""; 47 private String script = ""; 48 private String region = ""; 49 private String variant = ""; 50 51 private Map<CaseInsensitiveChar, String> extensions; 52 private Set<CaseInsensitiveString> uattributes; 53 private Map<CaseInsensitiveString, String> ukeywords; 54 55 56 public InternalLocaleBuilder() { 57 } 58 59 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { 60 if (LocaleUtils.isEmpty(language)) { 61 this.language = ""; 62 } else { 63 if (!LanguageTag.isLanguage(language)) { 64 throw new LocaleSyntaxException("Ill-formed language: " + language, 0); 65 } 66 this.language = language; 67 } 68 return this; 69 } 70 71 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { 72 if (LocaleUtils.isEmpty(script)) { 73 this.script = ""; 74 } else { 75 if (!LanguageTag.isScript(script)) { 76 throw new LocaleSyntaxException("Ill-formed script: " + script, 0); 77 } 78 this.script = script; 79 } 80 return this; 81 } 82 83 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { 84 if (LocaleUtils.isEmpty(region)) { 85 this.region = ""; 86 } else { 87 if (!LanguageTag.isRegion(region)) { 88 throw new LocaleSyntaxException("Ill-formed region: " + region, 0); 89 } 90 this.region = region; 91 } 92 return this; 93 } 94 95 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { 96 if (LocaleUtils.isEmpty(variant)) { 97 this.variant = ""; 98 } else { 99 // normalize separators to "_" 100 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 101 int errIdx = checkVariants(var, BaseLocale.SEP); 102 if (errIdx != -1) { 103 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 104 } 105 this.variant = var; 106 } 107 return this; 108 } 109 110 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 111 if (!UnicodeLocaleExtension.isAttribute(attribute)) { 112 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 113 } 114 // Use case insensitive string to prevent duplication 115 if (uattributes == null) { 116 uattributes = new HashSet<>(4); 117 } 118 uattributes.add(new CaseInsensitiveString(attribute)); 119 return this; 120 } 121 122 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 123 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 124 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 125 } 126 if (uattributes != null) { 127 uattributes.remove(new CaseInsensitiveString(attribute)); 128 } 129 return this; 130 } 131 132 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { 133 if (!UnicodeLocaleExtension.isKey(key)) { 134 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); 135 } 136 137 CaseInsensitiveString cikey = new CaseInsensitiveString(key); 138 if (type == null) { 139 if (ukeywords != null) { 140 // null type is used for remove the key 141 ukeywords.remove(cikey); 142 } 143 } else { 144 if (type.length() != 0) { 145 // normalize separator to "-" 146 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 147 // validate 148 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); 149 while (!itr.isDone()) { 150 String s = itr.current(); 151 if (!UnicodeLocaleExtension.isTypeSubtag(s)) { 152 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " 153 + type, 154 itr.currentStart()); 155 } 156 itr.next(); 157 } 158 } 159 if (ukeywords == null) { 160 ukeywords = new HashMap<>(4); 161 } 162 ukeywords.put(cikey, type); 163 } 164 return this; 165 } 166 167 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { 168 // validate key 169 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); 170 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { 171 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); 172 } 173 174 boolean remove = LocaleUtils.isEmpty(value); 175 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); 176 177 if (remove) { 178 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 179 // clear entire Unicode locale extension 180 if (uattributes != null) { 181 uattributes.clear(); 182 } 183 if (ukeywords != null) { 184 ukeywords.clear(); 185 } 186 } else { 187 if (extensions != null && extensions.containsKey(key)) { 188 extensions.remove(key); 189 } 190 } 191 } else { 192 // validate value 193 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 194 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); 195 while (!itr.isDone()) { 196 String s = itr.current(); 197 boolean validSubtag; 198 if (isBcpPrivateuse) { 199 validSubtag = LanguageTag.isPrivateuseSubtag(s); 200 } else { 201 validSubtag = LanguageTag.isExtensionSubtag(s); 202 } 203 if (!validSubtag) { 204 throw new LocaleSyntaxException("Ill-formed extension value: " + s, 205 itr.currentStart()); 206 } 207 itr.next(); 208 } 209 210 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 211 setUnicodeLocaleExtension(val); 212 } else { 213 if (extensions == null) { 214 extensions = new HashMap<>(4); 215 } 216 extensions.put(key, val); 217 } 218 } 219 return this; 220 } 221 222 /* 223 * Set extension/private subtags in a single string representation 224 */ 225 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { 226 if (LocaleUtils.isEmpty(subtags)) { 227 clearExtensions(); 228 return this; 229 } 230 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 231 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 232 233 List<String> extensions = null; 234 String privateuse = null; 235 236 int parsed = 0; 237 int start; 238 239 // Make a list of extension subtags 240 while (!itr.isDone()) { 241 String s = itr.current(); 242 if (LanguageTag.isExtensionSingleton(s)) { 243 start = itr.currentStart(); 244 String singleton = s; 245 StringBuilder sb = new StringBuilder(singleton); 246 247 itr.next(); 248 while (!itr.isDone()) { 249 s = itr.current(); 250 if (LanguageTag.isExtensionSubtag(s)) { 251 sb.append(LanguageTag.SEP).append(s); 252 parsed = itr.currentEnd(); 253 } else { 254 break; 255 } 256 itr.next(); 257 } 258 259 if (parsed < start) { 260 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", 261 start); 262 } 263 264 if (extensions == null) { 265 extensions = new ArrayList<>(4); 266 } 267 extensions.add(sb.toString()); 268 } else { 269 break; 270 } 271 } 272 if (!itr.isDone()) { 273 String s = itr.current(); 274 if (LanguageTag.isPrivateusePrefix(s)) { 275 start = itr.currentStart(); 276 StringBuilder sb = new StringBuilder(s); 277 278 itr.next(); 279 while (!itr.isDone()) { 280 s = itr.current(); 281 if (!LanguageTag.isPrivateuseSubtag(s)) { 282 break; 283 } 284 sb.append(LanguageTag.SEP).append(s); 285 parsed = itr.currentEnd(); 286 287 itr.next(); 288 } 289 if (parsed <= start) { 290 throw new LocaleSyntaxException("Incomplete privateuse:" 291 + subtags.substring(start), 292 start); 293 } else { 294 privateuse = sb.toString(); 295 } 296 } 297 } 298 299 if (!itr.isDone()) { 300 throw new LocaleSyntaxException("Ill-formed extension subtags:" 301 + subtags.substring(itr.currentStart()), 302 itr.currentStart()); 303 } 304 305 return setExtensions(extensions, privateuse); 306 } 307 308 /* 309 * Set a list of BCP47 extensions and private use subtags 310 * BCP47 extensions are already validated and well-formed, but may contain duplicates 311 */ 312 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) { 313 clearExtensions(); 314 315 if (!LocaleUtils.isEmpty(bcpExtensions)) { 316 Set<CaseInsensitiveChar> done = new HashSet<>(bcpExtensions.size()); 317 for (String bcpExt : bcpExtensions) { 318 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt); 319 // ignore duplicates 320 if (!done.contains(key)) { 321 // each extension string contains singleton, e.g. "a-abc-def" 322 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 323 setUnicodeLocaleExtension(bcpExt.substring(2)); 324 } else { 325 if (extensions == null) { 326 extensions = new HashMap<>(4); 327 } 328 extensions.put(key, bcpExt.substring(2)); 329 } 330 } 331 done.add(key); 332 } 333 } 334 if (privateuse != null && privateuse.length() > 0) { 335 // privateuse string contains prefix, e.g. "x-abc-def" 336 if (extensions == null) { 337 extensions = new HashMap<>(1); 338 } 339 extensions.put(new CaseInsensitiveChar(privateuse), privateuse.substring(2)); 340 } 341 342 return this; 343 } 344 345 /* 346 * Reset Builder's internal state with the given language tag 347 */ 348 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { 349 clear(); 350 if (!langtag.getExtlangs().isEmpty()) { 351 language = langtag.getExtlangs().get(0); 352 } else { 353 String lang = langtag.getLanguage(); 354 if (!lang.equals(LanguageTag.UNDETERMINED)) { 355 language = lang; 356 } 357 } 358 script = langtag.getScript(); 359 region = langtag.getRegion(); 360 361 List<String> bcpVariants = langtag.getVariants(); 362 if (!bcpVariants.isEmpty()) { 363 StringBuilder var = new StringBuilder(bcpVariants.get(0)); 364 int size = bcpVariants.size(); 365 for (int i = 1; i < size; i++) { 366 var.append(BaseLocale.SEP).append(bcpVariants.get(i)); 367 } 368 variant = var.toString(); 369 } 370 371 setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); 372 373 return this; 374 } 375 376 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions localeExtensions) throws LocaleSyntaxException { 377 String language = base.getLanguage(); 378 String script = base.getScript(); 379 String region = base.getRegion(); 380 String variant = base.getVariant(); 381 382 // Special backward compatibility support 383 384 // Exception 1 - ja_JP_JP 385 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { 386 // When locale ja_JP_JP is created, ca-japanese is always there. 387 // The builder ignores the variant "JP" 388 assert("japanese".equals(localeExtensions.getUnicodeLocaleType("ca"))); 389 variant = ""; 390 } 391 // Exception 2 - th_TH_TH 392 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { 393 // When locale th_TH_TH is created, nu-thai is always there. 394 // The builder ignores the variant "TH" 395 assert("thai".equals(localeExtensions.getUnicodeLocaleType("nu"))); 396 variant = ""; 397 } 398 // Exception 3 - no_NO_NY 399 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { 400 // no_NO_NY is a valid locale and used by Java 6 or older versions. 401 // The build ignores the variant "NY" and change the language to "nn". 402 language = "nn"; 403 variant = ""; 404 } 405 406 // Validate base locale fields before updating internal state. 407 // LocaleExtensions always store validated/canonicalized values, 408 // so no checks are necessary. 409 if (language.length() > 0 && !LanguageTag.isLanguage(language)) { 410 throw new LocaleSyntaxException("Ill-formed language: " + language); 411 } 412 413 if (script.length() > 0 && !LanguageTag.isScript(script)) { 414 throw new LocaleSyntaxException("Ill-formed script: " + script); 415 } 416 417 if (region.length() > 0 && !LanguageTag.isRegion(region)) { 418 throw new LocaleSyntaxException("Ill-formed region: " + region); 419 } 420 421 if (variant.length() > 0) { 422 // normalize separators to "_" 423 variant = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 424 int errIdx = checkVariants(variant, BaseLocale.SEP); 425 if (errIdx != -1) { 426 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 427 } 428 } 429 430 // The input locale is validated at this point. 431 // Now, updating builder's internal fields. 432 this.language = language; 433 this.script = script; 434 this.region = region; 435 this.variant = variant; 436 clearExtensions(); 437 438 Set<Character> extKeys = (localeExtensions == null) ? null : localeExtensions.getKeys(); 439 if (extKeys != null) { 440 // map localeExtensions back to builder's internal format 441 for (Character key : extKeys) { 442 Extension e = localeExtensions.getExtension(key); 443 if (e instanceof UnicodeLocaleExtension) { 444 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; 445 for (String uatr : ue.getUnicodeLocaleAttributes()) { 446 if (uattributes == null) { 447 uattributes = new HashSet<>(4); 448 } 449 uattributes.add(new CaseInsensitiveString(uatr)); 450 } 451 for (String ukey : ue.getUnicodeLocaleKeys()) { 452 if (ukeywords == null) { 453 ukeywords = new HashMap<>(4); 454 } 455 ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); 456 } 457 } else { 458 if (extensions == null) { 459 extensions = new HashMap<>(4); 460 } 461 extensions.put(new CaseInsensitiveChar(key), e.getValue()); 462 } 463 } 464 } 465 return this; 466 } 467 468 public InternalLocaleBuilder clear() { 469 language = ""; 470 script = ""; 471 region = ""; 472 variant = ""; 473 clearExtensions(); 474 return this; 475 } 476 477 public InternalLocaleBuilder clearExtensions() { 478 if (extensions != null) { 479 extensions.clear(); 480 } 481 if (uattributes != null) { 482 uattributes.clear(); 483 } 484 if (ukeywords != null) { 485 ukeywords.clear(); 486 } 487 return this; 488 } 489 490 public BaseLocale getBaseLocale() { 491 String language = this.language; 492 String script = this.script; 493 String region = this.region; 494 String variant = this.variant; 495 496 // Special private use subtag sequence identified by "lvariant" will be 497 // interpreted as Java variant. 498 if (extensions != null) { 499 String privuse = extensions.get(PRIVATEUSE_KEY); 500 if (privuse != null) { 501 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); 502 boolean sawPrefix = false; 503 int privVarStart = -1; 504 while (!itr.isDone()) { 505 if (sawPrefix) { 506 privVarStart = itr.currentStart(); 507 break; 508 } 509 if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 510 sawPrefix = true; 511 } 512 itr.next(); 513 } 514 if (privVarStart != -1) { 515 StringBuilder sb = new StringBuilder(variant); 516 if (sb.length() != 0) { 517 sb.append(BaseLocale.SEP); 518 } 519 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, 520 BaseLocale.SEP)); 521 variant = sb.toString(); 522 } 523 } 524 } 525 526 return BaseLocale.getInstance(language, script, region, variant); 527 } 528 529 public LocaleExtensions getLocaleExtensions() { 530 if (LocaleUtils.isEmpty(extensions) && LocaleUtils.isEmpty(uattributes) 531 && LocaleUtils.isEmpty(ukeywords)) { 532 return null; 533 } 534 535 LocaleExtensions lext = new LocaleExtensions(extensions, uattributes, ukeywords); 536 return lext.isEmpty() ? null : lext; 537 } 538 539 /* 540 * Remove special private use subtag sequence identified by "lvariant" 541 * and return the rest. Only used by LocaleExtensions 542 */ 543 static String removePrivateuseVariant(String privuseVal) { 544 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); 545 546 // Note: privateuse value "abc-lvariant" is unchanged 547 // because no subtags after "lvariant". 548 549 int prefixStart = -1; 550 boolean sawPrivuseVar = false; 551 while (!itr.isDone()) { 552 if (prefixStart != -1) { 553 // Note: privateuse value "abc-lvariant" is unchanged 554 // because no subtags after "lvariant". 555 sawPrivuseVar = true; 556 break; 557 } 558 if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 559 prefixStart = itr.currentStart(); 560 } 561 itr.next(); 562 } 563 if (!sawPrivuseVar) { 564 return privuseVal; 565 } 566 567 assert(prefixStart == 0 || prefixStart > 1); 568 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); 569 } 570 571 /* 572 * Check if the given variant subtags separated by the given 573 * separator(s) are valid 574 */ 575 private int checkVariants(String variants, String sep) { 576 StringTokenIterator itr = new StringTokenIterator(variants, sep); 577 while (!itr.isDone()) { 578 String s = itr.current(); 579 if (!LanguageTag.isVariant(s)) { 580 return itr.currentStart(); 581 } 582 itr.next(); 583 } 584 return -1; 585 } 586 587 /* 588 * Private methods parsing Unicode Locale Extension subtags. 589 * Duplicated attributes/keywords will be ignored. 590 * The input must be a valid extension subtags (excluding singleton). 591 */ 592 private void setUnicodeLocaleExtension(String subtags) { 593 // wipe out existing attributes/keywords 594 if (uattributes != null) { 595 uattributes.clear(); 596 } 597 if (ukeywords != null) { 598 ukeywords.clear(); 599 } 600 601 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 602 603 // parse attributes 604 while (!itr.isDone()) { 605 if (!UnicodeLocaleExtension.isAttribute(itr.current())) { 606 break; 607 } 608 if (uattributes == null) { 609 uattributes = new HashSet<>(4); 610 } 611 uattributes.add(new CaseInsensitiveString(itr.current())); 612 itr.next(); 613 } 614 615 // parse keywords 616 CaseInsensitiveString key = null; 617 String type; 618 int typeStart = -1; 619 int typeEnd = -1; 620 while (!itr.isDone()) { 621 if (key != null) { 622 if (UnicodeLocaleExtension.isKey(itr.current())) { 623 // next keyword - emit previous one 624 assert(typeStart == -1 || typeEnd != -1); 625 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 626 if (ukeywords == null) { 627 ukeywords = new HashMap<>(4); 628 } 629 ukeywords.put(key, type); 630 631 // reset keyword info 632 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); 633 key = ukeywords.containsKey(tmpKey) ? null : tmpKey; 634 typeStart = typeEnd = -1; 635 } else { 636 if (typeStart == -1) { 637 typeStart = itr.currentStart(); 638 } 639 typeEnd = itr.currentEnd(); 640 } 641 } else if (UnicodeLocaleExtension.isKey(itr.current())) { 642 // 1. first keyword or 643 // 2. next keyword, but previous one was duplicate 644 key = new CaseInsensitiveString(itr.current()); 645 if (ukeywords != null && ukeywords.containsKey(key)) { 646 // duplicate 647 key = null; 648 } 649 } 650 651 if (!itr.hasNext()) { 652 if (key != null) { 653 // last keyword 654 assert(typeStart == -1 || typeEnd != -1); 655 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 656 if (ukeywords == null) { 657 ukeywords = new HashMap<>(4); 658 } 659 ukeywords.put(key, type); 660 } 661 break; 662 } 663 664 itr.next(); 665 } 666 } 667 668 static final class CaseInsensitiveString { 669 private final String str, lowerStr; 670 671 CaseInsensitiveString(String s) { 672 str = s; 673 lowerStr = LocaleUtils.toLowerString(s); 674 } 675 676 public String value() { 677 return str; 678 } 679 680 @Override 681 public int hashCode() { 682 return lowerStr.hashCode(); 683 } 684 685 @Override 686 public boolean equals(Object obj) { 687 if (this == obj) { 688 return true; 689 } 690 if (!(obj instanceof CaseInsensitiveString)) { 691 return false; 692 } 693 return lowerStr.equals(((CaseInsensitiveString)obj).lowerStr); 694 } 695 } 696 697 static final class CaseInsensitiveChar { 698 private final char ch, lowerCh; 699 700 /** 701 * Constructs a CaseInsensitiveChar with the first char of the 702 * given s. 703 */ 704 private CaseInsensitiveChar(String s) { 705 this(s.charAt(0)); 706 } 707 708 CaseInsensitiveChar(char c) { 709 ch = c; 710 lowerCh = LocaleUtils.toLower(ch); 711 } 712 713 public char value() { 714 return ch; 715 } 716 717 @Override 718 public int hashCode() { 719 return lowerCh; 720 } 721 722 @Override 723 public boolean equals(Object obj) { 724 if (this == obj) { 725 return true; 726 } 727 if (!(obj instanceof CaseInsensitiveChar)) { 728 return false; 729 } 730 return lowerCh == ((CaseInsensitiveChar)obj).lowerCh; 731 } 732 } 733} 734