1/*
2 *******************************************************************************
3 * Copyright (C) 1996-2012, Google, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.util;
8
9import java.util.Comparator;
10import java.util.HashMap;
11import java.util.List;
12
13import com.ibm.icu.dev.util.UnicodeProperty.PatternMatcher;
14import com.ibm.icu.impl.UnicodeRegex;
15import com.ibm.icu.text.UTF16;
16import com.ibm.icu.text.UnicodeSet;
17
18/**
19 * Allows for overriding the parsing of UnicodeSet property patterns.
20 * <p>
21 * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the
22 * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
23 * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
24 * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
25 *
26 * @author markdavis
27 */
28public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable {
29    UnicodeRegex unicodeRegex;
30    final UnicodeProperty.Factory factory;
31
32    public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) {
33      unicodeRegex = new UnicodeRegex().setSymbolTable(this);
34      this.factory = factory;
35    }
36
37
38    //    public boolean applyPropertyAlias0(String propertyName,
39    //            String propertyValue, UnicodeSet result) {
40    //      if (!propertyName.contains("*")) {
41    //        return applyPropertyAlias(propertyName, propertyValue, result);
42    //      }
43    //      String[] propertyNames = propertyName.split("[*]");
44    //      for (int i = propertyNames.length - 1; i >= 0; ++i) {
45    //        String pname = propertyNames[i];
46    //
47    //      }
48    //      return null;
49    //    }
50
51    public boolean applyPropertyAlias(String propertyName,
52            String propertyValue, UnicodeSet result) {
53      boolean status = false;
54      boolean invert = false;
55      int posNotEqual = propertyName.indexOf('\u2260');
56      int posColon = propertyName.indexOf(':');
57      if (posNotEqual >= 0 || posColon >= 0) {
58          if (posNotEqual < 0) posNotEqual = propertyName.length();
59          if (posColon < 0) posColon = propertyName.length();
60          int opPos = posNotEqual < posColon ? posNotEqual : posColon;
61          propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1)
62                  : propertyName.substring(opPos+1) + "=" + propertyValue;
63          propertyName = propertyName.substring(0,opPos);
64          if (posNotEqual < posColon) {
65              invert = true;
66          }
67      }
68      if (propertyName.endsWith("!")) {
69        propertyName = propertyName.substring(0, propertyName.length() - 1);
70        invert = !invert;
71      }
72      propertyValue = propertyValue.trim();
73      if (propertyValue.length() != 0) {
74        status = applyPropertyAlias0(propertyName, propertyValue, result);
75      } else {
76        try {
77          status = applyPropertyAlias0("gc", propertyName, result);
78        } catch (Exception e) {};
79        if (!status) {
80          try {
81            status = applyPropertyAlias0("sc", propertyName, result);
82          } catch (Exception e) {};
83          if (!status) {
84            try {
85              status = applyPropertyAlias0(propertyName, "Yes", result);
86            } catch (Exception e) {};
87            if (!status) {
88              status = applyPropertyAlias0(propertyName, "", result);
89            }
90          }
91        }
92      }
93      if (status && invert) {
94        result.complement();
95      }
96      return status;
97    }
98
99    static final HashMap<String,String[]> GC_REMAP = new HashMap();
100    {
101        GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" "));
102        GC_REMAP.put("other", GC_REMAP.get("c"));
103
104        GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" "));
105        GC_REMAP.put("letter", GC_REMAP.get("l"));
106
107        GC_REMAP.put("lc", "Ll Lt Lu".split(" "));
108        GC_REMAP.put("casedletter", GC_REMAP.get("lc"));
109
110        GC_REMAP.put("m", "Mc Me Mn".split(" "));
111        GC_REMAP.put("mark", GC_REMAP.get("m"));
112
113        GC_REMAP.put("n", "Nd Nl No".split(" "));
114        GC_REMAP.put("number", GC_REMAP.get("n"));
115
116        GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" "));
117        GC_REMAP.put("punctuation", GC_REMAP.get("p"));
118        GC_REMAP.put("punct", GC_REMAP.get("p"));
119
120        GC_REMAP.put("s", "Sc Sk Sm So".split(" "));
121        GC_REMAP.put("symbol", GC_REMAP.get("s"));
122
123        GC_REMAP.put("z", "Zl Zp Zs".split(" "));
124        GC_REMAP.put("separator", GC_REMAP.get("z"));
125    }
126
127    public boolean applyPropertyAlias0(String propertyName,
128            String propertyValue, UnicodeSet result) {
129      result.clear();
130      UnicodeProperty prop = factory.getProperty(propertyName);
131      String canonicalName = prop.getName();
132      boolean isAge = UnicodeProperty.equalNames("Age", canonicalName);
133
134      // Hack for special GC values
135      if (canonicalName.equals("General_Category")) {
136          String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue));
137          if (parts != null) {
138              for (String part : parts) {
139                  prop.getSet(part, result);
140              }
141              return true;
142          }
143      }
144
145      PatternMatcher patternMatcher = null;
146      if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
147        String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1));
148        patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
149      }
150      UnicodeProperty otherProperty = null;
151      boolean testCp = false;
152      if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) {
153        String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim();
154        if ("cp".equalsIgnoreCase(otherPropName)) {
155          testCp = true;
156        } else {
157          otherProperty = factory.getProperty(otherPropName);
158        }
159      }
160      if (prop != null) {
161        UnicodeSet set;
162        if (testCp) {
163          set = new UnicodeSet();
164          for (int i = 0; i <= 0x10FFFF; ++i) {
165            if (UnicodeProperty.equals(i, prop.getValue(i))) {
166              set.add(i);
167            }
168          }
169        } else if (otherProperty != null) {
170          set = new UnicodeSet();
171          for (int i = 0; i <= 0x10FFFF; ++i) {
172            String v1 = prop.getValue(i);
173            String v2 = otherProperty.getValue(i);
174            if (UnicodeProperty.equals(v1, v2)) {
175              set.add(i);
176            }
177          }
178        } else if (patternMatcher == null) {
179          if (!isValid(prop, propertyValue)) {
180            throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName
181                    + " must be in "
182                    + prop.getAvailableValues() + " or in " + prop.getValueAliases());
183          }
184          if (isAge) {
185            set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq));
186          } else {
187            set = prop.getSet(propertyValue);
188          }
189        } else if (isAge) {
190          set = new UnicodeSet();
191          List<String> values = prop.getAvailableValues();
192          for (String value : values) {
193            if (patternMatcher.matches(value)) {
194              for (String other : values) {
195                if (other.compareTo(value) <= 0) {
196                  set.addAll(prop.getSet(other));
197                }
198              }
199            }
200          }
201        } else {
202          set = prop.getSet(patternMatcher);
203        }
204        result.addAll(set);
205        return true;
206      }
207      throw new IllegalArgumentException("Illegal property: " + propertyName);
208    }
209
210
211
212    private boolean isValid(UnicodeProperty prop, String propertyValue) {
213//      if (prop.getName().equals("General_Category")) {
214//        if (propertyValue)
215//      }
216      return prop.isValidValue(propertyValue);
217    }
218
219    public enum Relation {less, leq, equal, geq, greater}
220
221    public static class ComparisonMatcher implements PatternMatcher {
222        Relation relation;
223        static Comparator comparator = new UTF16.StringComparator(true, false,0);
224
225        String pattern;
226
227        public ComparisonMatcher(String pattern, Relation comparator) {
228          this.relation = comparator;
229          this.pattern = pattern;
230        }
231
232        public boolean matches(Object value) {
233          int comp = comparator.compare(pattern, value.toString());
234          switch (relation) {
235          case less: return comp < 0;
236          case leq: return comp <= 0;
237          default: return comp == 0;
238          case geq: return comp >= 0;
239          case greater: return comp > 0;
240          }
241        }
242
243        public PatternMatcher set(String pattern) {
244          this.pattern = pattern;
245          return this;
246        }
247      }
248  }
249