17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 1996-2011, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.UCaseProps; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A transliterator that performs locale-sensitive toLower() 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * case mapping. 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass LowercaseTransliterator extends Transliterator{ 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Package accessible ID. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String _ID = "Any-Lower"; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: Add variants for tr, az, lt, default = default locale 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * System registration hook. 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static void register() { 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerFactory(_ID, new Transliterator.Factory() { 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Transliterator getInstance(String ID) { 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new LowercaseTransliterator(ULocale.US); 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Transliterator.registerSpecialInverse("Lower", "Upper", true); 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ULocale locale; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UCaseProps csp; 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ReplaceableContextIterator iter; 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private StringBuilder result; 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int[] locCache; 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a transliterator. 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public LowercaseTransliterator(ULocale loc) { 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(_ID, null); 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locale = loc; 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert csp=UCaseProps.INSTANCE; 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter=new ReplaceableContextIterator(); 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = new StringBuilder(); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locCache = new int[1]; 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locCache[0]=0; 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Implements {@link Transliterator#handleTransliterate}. 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected synchronized void handleTransliterate(Replaceable text, 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Position offsets, boolean isIncremental) { 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(csp==null) { 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(offsets.start >= offsets.limit) { 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setText(text); 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c, delta; 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Walk through original string 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If there is a case change, modify corresponding position in replaceable 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setIndex(offsets.start); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setLimit(offsets.limit); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert iter.setContextLimits(offsets.contextStart, offsets.contextLimit); 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while((c=iter.nextCaseMapCP())>=0) { 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert c=csp.toFullLower(c, iter, result, locale, locCache); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(iter.didReachLimit() && isIncremental) { 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the case mapping function tried to look beyond the context limit 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // wait for more input 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start=iter.getCaseMapCPStart(); 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* decode the result */ 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(c<0) { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* c mapped to itself, no change */ 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(c<=UCaseProps.MAX_STRING_LENGTH) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by the mapping string */ 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(result.toString()); 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.setLength(0); 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* replace by single-code point mapping */ 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=iter.replace(UTF16.valueOf(c)); 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta!=0) { 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.limit += delta; 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.contextLimit += delta; 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert offsets.start = offsets.limit; 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: normally this would be static, but because the results vary by locale.... 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert SourceTargetUtility sourceTargetUtility = null; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* (non-Javadoc) 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert synchronized (this) { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (sourceTargetUtility == null) { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String transform(String source) { 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UCharacter.toLowerCase(locale, source); 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }); 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sourceTargetUtility.addSourceTargetSet(this, inputFilter, sourceSet, targetSet); 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 134