12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2011, International Business Machines Corporation and * 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UCaseProps; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that performs locale-sensitive toLower() 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * case mapping. 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass LowercaseTransliterator extends Transliterator{ 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Package accessible ID. 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String _ID = "Any-Lower"; 252d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 262d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert // TODO: Add variants for tr/az, lt, default = default locale: ICU ticket #12720 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System registration hook. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void register() { 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerFactory(_ID, new Transliterator.Factory() { 332d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Transliterator getInstance(String ID) { 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new LowercaseTransliterator(ULocale.US); 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerSpecialInverse("Lower", "Upper", true); 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 422d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert private final ULocale locale; 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 442d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert private final UCaseProps csp; 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ReplaceableContextIterator iter; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private StringBuilder result; 4763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer private int caseLocale; 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a transliterator. 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public LowercaseTransliterator(ULocale loc) { 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(_ID, null); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locale = loc; 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert csp=UCaseProps.INSTANCE; 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter=new ReplaceableContextIterator(); 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = new StringBuilder(); 5963cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer caseLocale = UCaseProps.getCaseLocale(locale); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implements {@link Transliterator#handleTransliterate}. 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 652d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected synchronized void handleTransliterate(Replaceable text, 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position offsets, boolean isIncremental) { 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(csp==null) { 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(offsets.start >= offsets.limit) { 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 742d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert } 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setText(text); 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c, delta; 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Walk through original string 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there is a case change, modify corresponding position in replaceable 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setIndex(offsets.start); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setLimit(offsets.limit); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setContextLimits(offsets.contextStart, offsets.contextLimit); 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while((c=iter.nextCaseMapCP())>=0) { 8763cafec8b8cb135e7c06ef6b9fc8c128ed55b140Markus Scherer c=csp.toFullLower(c, iter, result, caseLocale); 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(iter.didReachLimit() && isIncremental) { 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the case mapping function tried to look beyond the context limit 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // wait for more input 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start=iter.getCaseMapCPStart(); 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* decode the result */ 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c<0) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* c mapped to itself, no change */ 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by the mapping string */ 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(result.toString()); 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by single-code point mapping */ 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(UTF16.valueOf(c)); 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta!=0) { 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.limit += delta; 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.contextLimit += delta; 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start = offsets.limit; 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1162d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: normally this would be static, but because the results vary by locale.... 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SourceTargetUtility sourceTargetUtility = null; 1192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* (non-Javadoc) 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert synchronized (this) { 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (sourceTargetUtility == null) { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() { 1282d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert @Override 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String transform(String source) { 1302d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert return UCharacter.toLowerCase(locale, source); 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet); 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 138