1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 ******************************************************************************* 5 * Copyright (C) 2010-2011, Google, International Business Machines * 6 * Corporation and others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9package com.ibm.icu.text; 10 11import java.util.HashSet; 12import java.util.Set; 13 14import com.ibm.icu.lang.CharSequences; 15 16/** 17 * Simple internal utility class for helping with getSource/TargetSet 18 */ 19class SourceTargetUtility { 20 final Transform<String, String> transform; 21 final UnicodeSet sourceCache; 22 final Set<String> sourceStrings; 23 static final UnicodeSet NON_STARTERS = new UnicodeSet("[:^ccc=0:]").freeze(); 24 static Normalizer2 NFC = Normalizer2.getNFCInstance(); 25 //static final UnicodeSet TRAILING_COMBINING = new UnicodeSet(); 26 27 public SourceTargetUtility(Transform<String, String> transform) { 28 this(transform, null); 29 } 30 31 public SourceTargetUtility(Transform<String, String> transform, Normalizer2 normalizer) { 32 this.transform = transform; 33 if (normalizer != null) { 34// synchronized (SourceTargetUtility.class) { 35// if (NFC == null) { 36// NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE); 37// for (int i = 0; i <= 0x10FFFF; ++i) { 38// String d = NFC.getDecomposition(i); 39// if (d == null) { 40// continue; 41// } 42// String s = NFC.normalize(d); 43// if (!CharSequences.equals(i, s)) { 44// continue; 45// } 46// // composes 47// boolean first = false; 48// for (int trailing : CharSequences.codePoints(d)) { 49// if (first) { 50// first = false; 51// } else { 52// TRAILING_COMBINING.add(trailing); 53// } 54// } 55// } 56// } 57// } 58 sourceCache = new UnicodeSet("[:^ccc=0:]"); 59 } else { 60 sourceCache = new UnicodeSet(); 61 } 62 sourceStrings = new HashSet<String>(); 63 for (int i = 0; i <= 0x10FFFF; ++i) { 64 String s = transform.transform(UTF16.valueOf(i)); 65 boolean added = false; 66 if (!CharSequences.equals(i, s)) { 67 sourceCache.add(i); 68 added = true; 69 } 70 if (normalizer == null) { 71 continue; 72 } 73 String d = NFC.getDecomposition(i); 74 if (d == null) { 75 continue; 76 } 77 s = transform.transform(d); 78 if (!d.equals(s)) { 79 sourceStrings.add(d); 80 } 81 if (added) { 82 continue; 83 } 84 if (!normalizer.isInert(i)) { 85 sourceCache.add(i); 86 continue; 87 } 88 // see if any of the non-starters change s; if so, add i 89// for (String ns : TRAILING_COMBINING) { 90// String s2 = transform.transform(s + ns); 91// if (!s2.startsWith(s)) { 92// sourceCache.add(i); 93// break; 94// } 95// } 96 97 // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2; 98 // if (endOfFirst >= d.length()) { 99 // continue; 100 // } 101 // // now add all initial substrings 102 // for (int j = 1; j < d.length(); ++j) { 103 // if (!CharSequences.onCharacterBoundary(d, j)) { 104 // continue; 105 // } 106 // String dd = d.substring(0,j); 107 // s = transform.transform(dd); 108 // if (!dd.equals(s)) { 109 // sourceStrings.add(dd); 110 // } 111 // } 112 } 113 sourceCache.freeze(); 114 } 115 116 public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet, 117 UnicodeSet targetSet) { 118 UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter); 119 UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter); 120 sourceSet.addAll(affectedCharacters); 121 for (String s : affectedCharacters) { 122 targetSet.addAll(transform.transform(s)); 123 } 124 for (String s : sourceStrings) { 125 if (myFilter.containsAll(s)) { 126 String t = transform.transform(s); 127 if (!s.equals(t)) { 128 targetSet.addAll(t); 129 sourceSet.addAll(s); 130 } 131 } 132 } 133 } 134} 135