1/*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.i18n.addressinput;
18
19import com.android.i18n.addressinput.LookupKey.ScriptType;
20
21import java.util.EnumSet;
22import java.util.HashSet;
23import java.util.Map;
24import java.util.Set;
25import java.util.regex.Pattern;
26
27/**
28 * Accesses address verification data used to verify components of an address.
29 * <p> Not all fields require all types of validation, although this could be done. In particular,
30 * the current implementation only provides known value verification for the hierarchical fields,
31 * and only provides format and match verification for the postal code field.
32 */
33public class FieldVerifier {
34    // Node data values are delimited by this symbol.
35    private static final String DATA_DELIMITER = "~";
36    // Keys are built up using this delimiter: eg data/US, data/US/CA.
37    private static final String KEY_DELIMITER = "/";
38
39    private String mId;
40    private DataSource mDataSource;
41
42    private Set<AddressField> mPossibleFields;
43    private Set<AddressField> mRequired;
44    // Known values. Can be either a key, a name in Latin, or a name in native script.
45    private Map<String, String> mCandidateValues;
46
47    // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since
48    // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the
49    // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations
50    // exist.
51    private String[] mKeys;
52    // Names in Latin. These are only populated if the native/local names are in a script other than
53    // latin.
54    private String[] mLatinNames;
55    // Names in native script.
56    private String[] mLocalNames;
57
58    // Pattern representing the format of a postal code number.
59    private Pattern mFormat;
60    // Defines the valid range of a postal code number.
61    private Pattern mMatch;
62
63    /**
64     * Creates the root field verifier for a particular data source.
65     */
66    public FieldVerifier(DataSource dataSource) {
67        mDataSource = dataSource;
68        populateRootVerifier();
69    }
70
71    /**
72     * Creates a field verifier based on its parent and on the new data for this node supplied by
73     * nodeData (which may be null).
74     */
75    private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) {
76        // Most information is inherited from the parent.
77        mPossibleFields = parent.mPossibleFields;
78        mRequired = parent.mRequired;
79        mDataSource = parent.mDataSource;
80        mFormat = parent.mFormat;
81        mMatch = parent.mMatch;
82        // Here we add in any overrides from this particular node as well as information such as
83        // localNames, latinNames and keys.
84        populate(nodeData);
85        // candidateValues should never be inherited from the parent, but built up from the
86        // localNames in this node.
87        mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames);
88    }
89
90    /**
91     * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This
92     * is a little messy at the moment since not all the appropriate information is actually under
93     * the root "data" node in the metadata. For example, "possibleFields" and "required" are not
94     * present there.
95     */
96    private void populateRootVerifier() {
97        mId = "data";
98        // Keys come from the countries under "data".
99        AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data");
100        if (rootNode.containsKey(AddressDataKey.COUNTRIES)) {
101            mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER);
102        }
103        // candidateValues is just the set of keys.
104        mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null);
105
106        // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping.
107        // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this
108        // information instead.
109        AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ");
110        mPossibleFields = new HashSet<AddressField>();
111        if (defaultZZ.containsKey(AddressDataKey.FMT)) {
112            mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT));
113        }
114        mRequired = new HashSet<AddressField>();
115        if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) {
116            mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE));
117        }
118    }
119
120    /**
121     * Populates this verifier with data from the node data passed in. This may be null.
122     */
123    private void populate(AddressVerificationNodeData nodeData) {
124        if (nodeData == null) {
125            return;
126        }
127        if (nodeData.containsKey(AddressDataKey.ID)) {
128            mId = nodeData.get(AddressDataKey.ID);
129        }
130        if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) {
131            mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER);
132        }
133        if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) {
134            mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER);
135        }
136        if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) {
137            mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER);
138        }
139        if (nodeData.containsKey(AddressDataKey.FMT)) {
140            mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT));
141        }
142        if (nodeData.containsKey(AddressDataKey.REQUIRE)) {
143            mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE));
144        }
145        if (nodeData.containsKey(AddressDataKey.XZIP)) {
146            mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE);
147        }
148        if (nodeData.containsKey(AddressDataKey.ZIP)) {
149            // This key has two different meanings, depending on whether this is a country-level key
150            // or not.
151            if (isCountryKey()) {
152                mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
153                                          Pattern.CASE_INSENSITIVE);
154            } else {
155                mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
156                                         Pattern.CASE_INSENSITIVE);
157            }
158        }
159        // If there are latin names but no local names, and there are the same number of latin names
160        // as there are keys, then we assume the local names are the same as the keys.
161        if (mKeys != null && mLocalNames == null && mLatinNames != null &&
162            mKeys.length == mLatinNames.length) {
163            mLocalNames = mKeys;
164        }
165    }
166
167    FieldVerifier refineVerifier(String sublevel) {
168        if (Util.trimToNull(sublevel) == null) {
169            return new FieldVerifier(this, null);
170        }
171        // If the parent node didn't exist, then the subLevelName will start with "null".
172        String subLevelName = mId + KEY_DELIMITER + sublevel;
173        // For names with no Latin equivalent, we can look up the sublevel name directly.
174        AddressVerificationNodeData nodeData = mDataSource.get(subLevelName);
175        if (nodeData != null) {
176            return new FieldVerifier(this, nodeData);
177        }
178        // If that failed, then we try to look up the local name equivalent of this latin name.
179        // First check these exist.
180        if (mLatinNames == null) {
181            return new FieldVerifier(this, null);
182        }
183        for (int n = 0; n < mLatinNames.length; n++) {
184            if (mLatinNames[n].equalsIgnoreCase(sublevel)) {
185                // We found a match - we should try looking up a key with the local name at the same
186                // index.
187                subLevelName = mId + KEY_DELIMITER + mLocalNames[n];
188                nodeData = mDataSource.get(subLevelName);
189                if (nodeData != null) {
190                    return new FieldVerifier(this, nodeData);
191                }
192            }
193        }
194        // No sub-verifiers were found.
195        return new FieldVerifier(this, null);
196    }
197
198    /**
199     * Returns the ID of this verifier.
200     */
201    @Override
202    public String toString() {
203        return mId;
204    }
205
206    /**
207     * Checks a value in a particular script for a particular field to see if it causes the problem
208     * specified. If so, this problem is added to the AddressProblems collection passed in. Returns
209     * true if no problem was found.
210     */
211    protected boolean check(ScriptType script, AddressProblemType problem, AddressField field,
212            String value, AddressProblems problems) {
213        boolean problemFound = false;
214
215        String trimmedValue = Util.trimToNull(value);
216        switch (problem) {
217            case USING_UNUSED_FIELD:
218                if (trimmedValue != null && !mPossibleFields.contains(field)) {
219                    problemFound = true;
220                }
221                break;
222            case MISSING_REQUIRED_FIELD:
223                if (mRequired.contains(field) && trimmedValue == null) {
224                    problemFound = true;
225                }
226                break;
227            case UNKNOWN_VALUE:
228                // An empty string will never be an UNKNOWN_VALUE. It is invalid
229                // only when it appears in a required field (In that case it will
230                // be reported as MISSING_REQUIRED_FIELD).
231                if (trimmedValue == null) {
232                    break;
233                }
234                problemFound = !isKnownInScript(script, trimmedValue);
235                break;
236            case UNRECOGNIZED_FORMAT:
237                if (trimmedValue != null && mFormat != null &&
238                        !mFormat.matcher(trimmedValue).matches()) {
239                    problemFound = true;
240                }
241                break;
242            case MISMATCHING_VALUE:
243                if (trimmedValue != null && mMatch != null &&
244                        !mMatch.matcher(trimmedValue).lookingAt()) {
245                    problemFound = true;
246                }
247                break;
248            default:
249                throw new RuntimeException("Unknown problem: " + problem);
250        }
251        if (problemFound) {
252            problems.add(field, problem);
253        }
254        return !problemFound;
255    }
256
257    /**
258     * Checks the value of a particular field in a particular script against the known values for
259     * this field. If script is null, it checks both the local and the latin values. Otherwise it
260     * checks only the values in the script specified.
261     */
262    private boolean isKnownInScript(ScriptType script, String value) {
263        String trimmedValue = Util.trimToNull(value);
264        Util.checkNotNull(trimmedValue);
265        if (script == null) {
266            return (mCandidateValues == null ||
267                    mCandidateValues.containsKey(trimmedValue.toLowerCase()));
268        }
269        // Otherwise, if we know the script, we want to restrict the candidates to only names in
270        // that script.
271        String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames;
272        Set<String> candidates = new HashSet<String>();
273        if (namesToConsider != null) {
274            for (String name : namesToConsider) {
275                candidates.add(name.toLowerCase());
276            }
277        }
278        if (mKeys != null) {
279            for (String name : mKeys) {
280                candidates.add(name.toLowerCase());
281            }
282        }
283
284        if (candidates.size() == 0 || trimmedValue == null) {
285            return true;
286        }
287
288        return candidates.contains(value.toLowerCase());
289    }
290
291    /**
292     * Parses the value of the "fmt" key in the data to see which fields are used for a particular
293     * country. Returns a list of all fields found. Country is always assumed to be present. Skips
294     * characters that indicate new-lines in the format information, as well as any characters not
295     * escaped with "%".
296     */
297    private static Set<AddressField> parseAddressFields(String value) {
298        EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
299        boolean escaped = false;
300        for (char c : value.toCharArray()) {
301            if (escaped) {
302                escaped = false;
303                if (c == 'n') {
304                    continue;
305                }
306                AddressField f = AddressField.of(c);
307                if (f == null) {
308                    throw new RuntimeException(
309                            "Unrecognized character '" + c + "' in format pattern: " + value);
310                }
311                result.add(f);
312            } else if (c == '%') {
313                escaped = true;
314            }
315        }
316        // These fields are not mentioned in the metadata at the moment since there is an effort to
317        // move away from STREET_ADDRESS and use these fields instead. This means they have to be
318        // removed here.
319        result.remove(AddressField.ADDRESS_LINE_1);
320        result.remove(AddressField.ADDRESS_LINE_2);
321
322        return result;
323    }
324
325    /**
326     * Parses the value of the "required" key in the data. Adds country as well as any other field
327     * mentioned in the string.
328     */
329    private static Set<AddressField> parseRequireString(String value) {
330        // Country is always required
331        EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
332
333        for (char c : value.toCharArray()) {
334            AddressField f = AddressField.of(c);
335            if (f == null) {
336                throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: "
337                        + value);
338            }
339            result.add(f);
340        }
341        // These fields are not mentioned in the metadata at the moment since there is an effort to
342        // move away from STREET_ADDRESS and use these fields instead. This means they have to be
343        // removed here.
344        result.remove(AddressField.ADDRESS_LINE_1);
345        result.remove(AddressField.ADDRESS_LINE_2);
346
347        return result;
348    }
349
350    /**
351     * Returns true if this key represents a country. We assume all keys with only one delimiter are
352     * at the country level (such as "data/US").
353     */
354    private boolean isCountryKey() {
355        Util.checkNotNull(mId, "Cannot use null as key");
356        return mId.split(KEY_DELIMITER).length == 2;
357    }
358}
359