1/* 2 ******************************************************************************* 3 * Copyright (C) 1996-2012, Google, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.util; 8 9import java.util.Comparator; 10import java.util.HashMap; 11import java.util.List; 12 13import com.ibm.icu.dev.util.UnicodeProperty.PatternMatcher; 14import com.ibm.icu.impl.UnicodeRegex; 15import com.ibm.icu.text.UTF16; 16import com.ibm.icu.text.UnicodeSet; 17 18/** 19 * Allows for overriding the parsing of UnicodeSet property patterns. 20 * <p> 21 * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the 22 * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call 23 * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} 24 * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. 25 * 26 * @author markdavis 27 */ 28public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable { 29 UnicodeRegex unicodeRegex; 30 final UnicodeProperty.Factory factory; 31 32 public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) { 33 unicodeRegex = new UnicodeRegex().setSymbolTable(this); 34 this.factory = factory; 35 } 36 37 38 // public boolean applyPropertyAlias0(String propertyName, 39 // String propertyValue, UnicodeSet result) { 40 // if (!propertyName.contains("*")) { 41 // return applyPropertyAlias(propertyName, propertyValue, result); 42 // } 43 // String[] propertyNames = propertyName.split("[*]"); 44 // for (int i = propertyNames.length - 1; i >= 0; ++i) { 45 // String pname = propertyNames[i]; 46 // 47 // } 48 // return null; 49 // } 50 51 public boolean applyPropertyAlias(String propertyName, 52 String propertyValue, UnicodeSet result) { 53 boolean status = false; 54 boolean invert = false; 55 int posNotEqual = propertyName.indexOf('\u2260'); 56 int posColon = propertyName.indexOf(':'); 57 if (posNotEqual >= 0 || posColon >= 0) { 58 if (posNotEqual < 0) posNotEqual = propertyName.length(); 59 if (posColon < 0) posColon = propertyName.length(); 60 int opPos = posNotEqual < posColon ? posNotEqual : posColon; 61 propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) 62 : propertyName.substring(opPos+1) + "=" + propertyValue; 63 propertyName = propertyName.substring(0,opPos); 64 if (posNotEqual < posColon) { 65 invert = true; 66 } 67 } 68 if (propertyName.endsWith("!")) { 69 propertyName = propertyName.substring(0, propertyName.length() - 1); 70 invert = !invert; 71 } 72 propertyValue = propertyValue.trim(); 73 if (propertyValue.length() != 0) { 74 status = applyPropertyAlias0(propertyName, propertyValue, result); 75 } else { 76 try { 77 status = applyPropertyAlias0("gc", propertyName, result); 78 } catch (Exception e) {}; 79 if (!status) { 80 try { 81 status = applyPropertyAlias0("sc", propertyName, result); 82 } catch (Exception e) {}; 83 if (!status) { 84 try { 85 status = applyPropertyAlias0(propertyName, "Yes", result); 86 } catch (Exception e) {}; 87 if (!status) { 88 status = applyPropertyAlias0(propertyName, "", result); 89 } 90 } 91 } 92 } 93 if (status && invert) { 94 result.complement(); 95 } 96 return status; 97 } 98 99 static final HashMap<String,String[]> GC_REMAP = new HashMap(); 100 { 101 GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" ")); 102 GC_REMAP.put("other", GC_REMAP.get("c")); 103 104 GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" ")); 105 GC_REMAP.put("letter", GC_REMAP.get("l")); 106 107 GC_REMAP.put("lc", "Ll Lt Lu".split(" ")); 108 GC_REMAP.put("casedletter", GC_REMAP.get("lc")); 109 110 GC_REMAP.put("m", "Mc Me Mn".split(" ")); 111 GC_REMAP.put("mark", GC_REMAP.get("m")); 112 113 GC_REMAP.put("n", "Nd Nl No".split(" ")); 114 GC_REMAP.put("number", GC_REMAP.get("n")); 115 116 GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" ")); 117 GC_REMAP.put("punctuation", GC_REMAP.get("p")); 118 GC_REMAP.put("punct", GC_REMAP.get("p")); 119 120 GC_REMAP.put("s", "Sc Sk Sm So".split(" ")); 121 GC_REMAP.put("symbol", GC_REMAP.get("s")); 122 123 GC_REMAP.put("z", "Zl Zp Zs".split(" ")); 124 GC_REMAP.put("separator", GC_REMAP.get("z")); 125 } 126 127 public boolean applyPropertyAlias0(String propertyName, 128 String propertyValue, UnicodeSet result) { 129 result.clear(); 130 UnicodeProperty prop = factory.getProperty(propertyName); 131 String canonicalName = prop.getName(); 132 boolean isAge = UnicodeProperty.equalNames("Age", canonicalName); 133 134 // Hack for special GC values 135 if (canonicalName.equals("General_Category")) { 136 String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue)); 137 if (parts != null) { 138 for (String part : parts) { 139 prop.getSet(part, result); 140 } 141 return true; 142 } 143 } 144 145 PatternMatcher patternMatcher = null; 146 if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) { 147 String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1)); 148 patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex); 149 } 150 UnicodeProperty otherProperty = null; 151 boolean testCp = false; 152 if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) { 153 String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim(); 154 if ("cp".equalsIgnoreCase(otherPropName)) { 155 testCp = true; 156 } else { 157 otherProperty = factory.getProperty(otherPropName); 158 } 159 } 160 if (prop != null) { 161 UnicodeSet set; 162 if (testCp) { 163 set = new UnicodeSet(); 164 for (int i = 0; i <= 0x10FFFF; ++i) { 165 if (UnicodeProperty.equals(i, prop.getValue(i))) { 166 set.add(i); 167 } 168 } 169 } else if (otherProperty != null) { 170 set = new UnicodeSet(); 171 for (int i = 0; i <= 0x10FFFF; ++i) { 172 String v1 = prop.getValue(i); 173 String v2 = otherProperty.getValue(i); 174 if (UnicodeProperty.equals(v1, v2)) { 175 set.add(i); 176 } 177 } 178 } else if (patternMatcher == null) { 179 if (!isValid(prop, propertyValue)) { 180 throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName 181 + " must be in " 182 + prop.getAvailableValues() + " or in " + prop.getValueAliases()); 183 } 184 if (isAge) { 185 set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq)); 186 } else { 187 set = prop.getSet(propertyValue); 188 } 189 } else if (isAge) { 190 set = new UnicodeSet(); 191 List<String> values = prop.getAvailableValues(); 192 for (String value : values) { 193 if (patternMatcher.matches(value)) { 194 for (String other : values) { 195 if (other.compareTo(value) <= 0) { 196 set.addAll(prop.getSet(other)); 197 } 198 } 199 } 200 } 201 } else { 202 set = prop.getSet(patternMatcher); 203 } 204 result.addAll(set); 205 return true; 206 } 207 throw new IllegalArgumentException("Illegal property: " + propertyName); 208 } 209 210 211 212 private boolean isValid(UnicodeProperty prop, String propertyValue) { 213// if (prop.getName().equals("General_Category")) { 214// if (propertyValue) 215// } 216 return prop.isValidValue(propertyValue); 217 } 218 219 public enum Relation {less, leq, equal, geq, greater} 220 221 public static class ComparisonMatcher implements PatternMatcher { 222 Relation relation; 223 static Comparator comparator = new UTF16.StringComparator(true, false,0); 224 225 String pattern; 226 227 public ComparisonMatcher(String pattern, Relation comparator) { 228 this.relation = comparator; 229 this.pattern = pattern; 230 } 231 232 public boolean matches(Object value) { 233 int comp = comparator.compare(pattern, value.toString()); 234 switch (relation) { 235 case less: return comp < 0; 236 case leq: return comp <= 0; 237 default: return comp == 0; 238 case geq: return comp >= 0; 239 case greater: return comp > 0; 240 } 241 } 242 243 public PatternMatcher set(String pattern) { 244 this.pattern = pattern; 245 return this; 246 } 247 } 248 } 249