17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2009-2014, International Business Machines 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUUncheckedIOException; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Normalization filtered by a UnicodeSet. 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Normalizes portions of the text contained in the filter set and leaves 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * portions not contained in the filter set unchanged. 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Filtering is done via UnicodeSet.span(..., UnicodeSet.SpanCondition.SIMPLE). 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This class implements all of (and only) the Normalizer2 API. 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * An instance of this class is unmodifiable/immutable. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Markus W. Scherer 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class FilteredNormalizer2 extends Normalizer2 { 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a filtered normalizer wrapping any Normalizer2 instance 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and a filter set. 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Both are aliased and must not be modified or deleted while this object 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * is used. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The filter set should be frozen; otherwise the performance will suffer greatly. 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param n2 wrapped Normalizer2 instance 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param filterSet UnicodeSet which determines the characters to be normalized 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public FilteredNormalizer2(Normalizer2 n2, UnicodeSet filterSet) { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2=n2; 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert set=filterSet; 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public StringBuilder normalize(CharSequence src, StringBuilder dest) { 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(dest==src) { 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(); 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.setLength(0); 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return dest; 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.6 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Appendable normalize(CharSequence src, Appendable dest) { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(dest==src) { 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(); 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return normalize(src, dest, UnicodeSet.SpanCondition.SIMPLE); 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public StringBuilder normalizeSecondAndAppend( 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder first, CharSequence second) { 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return normalizeSecondAndAppend(first, second, true); 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public StringBuilder append(StringBuilder first, CharSequence second) { 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return normalizeSecondAndAppend(first, second, false); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.6 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String getDecomposition(int c) { 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return set.contains(c) ? norm2.getDecomposition(c) : null; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 49 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String getRawDecomposition(int c) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return set.contains(c) ? norm2.getRawDecomposition(c) : null; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 49 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int composePair(int a, int b) { 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : -1; 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 49 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getCombiningClass(int c) { 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return set.contains(c) ? norm2.getCombiningClass(c) : 0; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean isNormalized(CharSequence s) { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int spanLimit=set.span(s, prevSpanLimit, spanCondition); 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(!norm2.isNormalized(s.subSequence(prevSpanLimit, spanLimit))) { 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prevSpanLimit=spanLimit; 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Normalizer.QuickCheckResult quickCheck(CharSequence s) { 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Normalizer.QuickCheckResult result=Normalizer.YES; 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int spanLimit=set.span(s, prevSpanLimit, spanCondition); 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Normalizer.QuickCheckResult qcResult= 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit)); 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(qcResult==Normalizer.NO) { 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return qcResult; 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(qcResult==Normalizer.MAYBE) { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result=qcResult; 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prevSpanLimit=spanLimit; 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int spanQuickCheckYes(CharSequence s) { 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int prevSpanLimit=0; prevSpanLimit<s.length();) { 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int spanLimit=set.span(s, prevSpanLimit, spanCondition); 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.SIMPLE; 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int yesLimit= 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prevSpanLimit+ 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.spanQuickCheckYes(s.subSequence(prevSpanLimit, spanLimit)); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(yesLimit<spanLimit) { 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return yesLimit; 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prevSpanLimit=spanLimit; 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return s.length(); 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasBoundaryBefore(int c) { 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return !set.contains(c) || norm2.hasBoundaryBefore(c); 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasBoundaryAfter(int c) { 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return !set.contains(c) || norm2.hasBoundaryAfter(c); 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * {@inheritDoc} 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.4 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean isInert(int c) { 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return !set.contains(c) || norm2.isInert(c); 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Internal: No argument checking, and appends to dest. 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Pass as input spanCondition the one that is likely to yield a non-zero 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // span length at the start of src. 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // UnicodeSet.SpanCondition.SIMPLE should be passed in for the start of src 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and UnicodeSet.SpanCondition.NOT_CONTAINED should be passed in if we continue after 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // an in-filter prefix. 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Appendable normalize(CharSequence src, Appendable dest, 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet.SpanCondition spanCondition) { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Don't throw away destination buffer between iterations. 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder tempDest=new StringBuilder(); 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int prevSpanLimit=0; prevSpanLimit<src.length();) { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int spanLimit=set.span(src, prevSpanLimit, spanCondition); 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int spanLength=spanLimit-prevSpanLimit; 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanLength!=0) { 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.append(src, prevSpanLimit, spanLimit); 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.SIMPLE; 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(spanLength!=0) { 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Not norm2.normalizeSecondAndAppend() because we do not want 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to modify the non-filter part of dest. 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest)); 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert prevSpanLimit=spanLimit; 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(IOException e) { 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new ICUUncheckedIOException(e); 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return dest; 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second, 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean doNormalize) { 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(first==second) { 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException(); 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(first.length()==0) { 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(doNormalize) { 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return normalize(second, first); 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return first.append(second); 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // merge the in-filter suffix of the first string with the in-filter prefix of the second 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int prefixLimit=set.span(second, 0, UnicodeSet.SpanCondition.SIMPLE); 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(prefixLimit!=0) { 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence prefix=second.subSequence(0, prefixLimit); 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int suffixStart=set.spanBack(first, 0x7fffffff, UnicodeSet.SpanCondition.SIMPLE); 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(suffixStart==0) { 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(doNormalize) { 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.normalizeSecondAndAppend(first, prefix); 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.append(first, prefix); 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder middle=new StringBuilder( 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert first.subSequence(suffixStart, first.length())); 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(doNormalize) { 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.normalizeSecondAndAppend(middle, prefix); 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert norm2.append(middle, prefix); 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert first.delete(suffixStart, 0x7fffffff).append(middle); 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(prefixLimit<second.length()) { 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence rest=second.subSequence(prefixLimit, second.length()); 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(doNormalize) { 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert normalize(rest, first, UnicodeSet.SpanCondition.NOT_CONTAINED); 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert first.append(rest); 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return first; 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Normalizer2 norm2; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private UnicodeSet set; 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}; 303