12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2011, International Business Machines Corporation and 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UCaseProps; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that converts all letters (as defined by 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <code>UCharacter.isLetter()</code>) to lower case, except for those 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * letters preceded by non-letters. The latter are converted to title 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * case using <code>UCharacter.toTitleCase()</code>. 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass TitlecaseTransliterator extends Transliterator { 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String _ID = "Any-Title"; 242d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System registration hook. 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void register() { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerFactory(_ID, new Transliterator.Factory() { 312d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Transliterator getInstance(String ID) { 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new TitlecaseTransliterator(ULocale.US); 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert registerSpecialInverse("Title", "Lower", false); 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 402d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert private final ULocale locale; 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert private final UCaseProps csp; 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ReplaceableContextIterator iter; 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private StringBuilder result; 4563cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int caseLocale; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a transliterator. 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TitlecaseTransliterator(ULocale loc) { 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(_ID, null); 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locale = loc; 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Need to look back 2 characters in the case of "can't" 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert setMaximumContextLength(2); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert csp=UCaseProps.INSTANCE; 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter=new ReplaceableContextIterator(); 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = new StringBuilder(); 5863cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer caseLocale = UCaseProps.getCaseLocale(locale); 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 602d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implements {@link Transliterator#handleTransliterate}. 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 642d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected synchronized void handleTransliterate(Replaceable text, 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position offsets, boolean isIncremental) { 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO reimplement, see ustrcase.c 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // using a real word break iterator 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // instead of just looking for a transition between cased and uncased characters 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // needs to take isIncremental into account because case mappings are context-sensitive 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // also detect when lowercasing function did not finish because of context 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (offsets.start >= offsets.limit) { 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int type; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Our mode; we are either converting letter toTitle or 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // toLower. 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean doTitle = true; 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Determine if there is a preceding context of cased case-ignorable*, 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // in which case we want to start in toLower mode. If the 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // prior context is anything else (including empty) then start 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // in toTitle mode. 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c, start; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF16.getCharCount(c)) { 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c = text.char32At(start); 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert type=csp.getTypeOrIgnorable(c); 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(type>0) { // cased 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert doTitle=false; 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(type==0) { // uncased but not ignorable 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // else (type<0) case-ignorable: continue 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Convert things after a cased character toLower; things 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // after a uncased, non-case-ignorable character toTitle. Case-ignorable 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // characters are copied directly and do not change the mode. 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setText(text); 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setIndex(offsets.start); 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setLimit(offsets.limit); 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setContextLimits(offsets.contextStart, offsets.contextLimit); 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Walk through original string 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there is a case change, modify corresponding position in replaceable 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while((c=iter.nextCaseMapCP())>=0) { 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert type=csp.getTypeOrIgnorable(c); 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(type>=0) { // not case-ignorable 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(doTitle) { 12163cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer c=csp.toFullTitle(c, iter, result, caseLocale); 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 12363cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer c=csp.toFullLower(c, iter, result, caseLocale); 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert doTitle = type==0; // doTitle=isUncased 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(iter.didReachLimit() && isIncremental) { 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the case mapping function tried to look beyond the context limit 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // wait for more input 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start=iter.getCaseMapCPStart(); 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* decode the result */ 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c<0) { 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* c mapped to itself, no change */ 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by the mapping string */ 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(result.toString()); 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by single-code point mapping */ 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(UTF16.valueOf(c)); 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta!=0) { 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.limit += delta; 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.contextLimit += delta; 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start = offsets.limit; 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1552d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: normally this would be static, but because the results vary by locale.... 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SourceTargetUtility sourceTargetUtility = null; 1582d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* (non-Javadoc) 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert synchronized (this) { 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (sourceTargetUtility == null) { 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() { 1672d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String transform(String source) { 1692d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert return UCharacter.toTitleCase(locale, source, null); 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet); 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 177