/* * Copyright (C) 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.i18n.addressinput; import com.android.i18n.addressinput.LookupKey.ScriptType; import java.util.EnumSet; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; /** * Accesses address verification data used to verify components of an address. *

Not all fields require all types of validation, although this could be done. In particular, * the current implementation only provides known value verification for the hierarchical fields, * and only provides format and match verification for the postal code field. */ public class FieldVerifier { // Node data values are delimited by this symbol. private static final String DATA_DELIMITER = "~"; // Keys are built up using this delimiter: eg data/US, data/US/CA. private static final String KEY_DELIMITER = "/"; private String mId; private DataSource mDataSource; private Set mPossibleFields; private Set mRequired; // Known values. Can be either a key, a name in Latin, or a name in native script. private Map mCandidateValues; // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations // exist. private String[] mKeys; // Names in Latin. These are only populated if the native/local names are in a script other than // latin. private String[] mLatinNames; // Names in native script. private String[] mLocalNames; // Pattern representing the format of a postal code number. private Pattern mFormat; // Defines the valid range of a postal code number. private Pattern mMatch; /** * Creates the root field verifier for a particular data source. */ public FieldVerifier(DataSource dataSource) { mDataSource = dataSource; populateRootVerifier(); } /** * Creates a field verifier based on its parent and on the new data for this node supplied by * nodeData (which may be null). */ private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) { // Most information is inherited from the parent. mPossibleFields = parent.mPossibleFields; mRequired = parent.mRequired; mDataSource = parent.mDataSource; mFormat = parent.mFormat; mMatch = parent.mMatch; // Here we add in any overrides from this particular node as well as information such as // localNames, latinNames and keys. populate(nodeData); // candidateValues should never be inherited from the parent, but built up from the // localNames in this node. mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames); } /** * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This * is a little messy at the moment since not all the appropriate information is actually under * the root "data" node in the metadata. For example, "possibleFields" and "required" are not * present there. */ private void populateRootVerifier() { mId = "data"; // Keys come from the countries under "data". AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data"); if (rootNode.containsKey(AddressDataKey.COUNTRIES)) { mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER); } // candidateValues is just the set of keys. mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null); // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping. // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this // information instead. AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ"); mPossibleFields = new HashSet(); if (defaultZZ.containsKey(AddressDataKey.FMT)) { mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT)); } mRequired = new HashSet(); if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) { mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE)); } } /** * Populates this verifier with data from the node data passed in. This may be null. */ private void populate(AddressVerificationNodeData nodeData) { if (nodeData == null) { return; } if (nodeData.containsKey(AddressDataKey.ID)) { mId = nodeData.get(AddressDataKey.ID); } if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) { mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER); } if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) { mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER); } if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) { mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER); } if (nodeData.containsKey(AddressDataKey.FMT)) { mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT)); } if (nodeData.containsKey(AddressDataKey.REQUIRE)) { mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE)); } if (nodeData.containsKey(AddressDataKey.XZIP)) { mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE); } if (nodeData.containsKey(AddressDataKey.ZIP)) { // This key has two different meanings, depending on whether this is a country-level key // or not. if (isCountryKey()) { mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP), Pattern.CASE_INSENSITIVE); } else { mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP), Pattern.CASE_INSENSITIVE); } } // If there are latin names but no local names, and there are the same number of latin names // as there are keys, then we assume the local names are the same as the keys. if (mKeys != null && mLocalNames == null && mLatinNames != null && mKeys.length == mLatinNames.length) { mLocalNames = mKeys; } } FieldVerifier refineVerifier(String sublevel) { if (Util.trimToNull(sublevel) == null) { return new FieldVerifier(this, null); } // If the parent node didn't exist, then the subLevelName will start with "null". String subLevelName = mId + KEY_DELIMITER + sublevel; // For names with no Latin equivalent, we can look up the sublevel name directly. AddressVerificationNodeData nodeData = mDataSource.get(subLevelName); if (nodeData != null) { return new FieldVerifier(this, nodeData); } // If that failed, then we try to look up the local name equivalent of this latin name. // First check these exist. if (mLatinNames == null) { return new FieldVerifier(this, null); } for (int n = 0; n < mLatinNames.length; n++) { if (mLatinNames[n].equalsIgnoreCase(sublevel)) { // We found a match - we should try looking up a key with the local name at the same // index. subLevelName = mId + KEY_DELIMITER + mLocalNames[n]; nodeData = mDataSource.get(subLevelName); if (nodeData != null) { return new FieldVerifier(this, nodeData); } } } // No sub-verifiers were found. return new FieldVerifier(this, null); } /** * Returns the ID of this verifier. */ @Override public String toString() { return mId; } /** * Checks a value in a particular script for a particular field to see if it causes the problem * specified. If so, this problem is added to the AddressProblems collection passed in. Returns * true if no problem was found. */ protected boolean check(ScriptType script, AddressProblemType problem, AddressField field, String value, AddressProblems problems) { boolean problemFound = false; String trimmedValue = Util.trimToNull(value); switch (problem) { case USING_UNUSED_FIELD: if (trimmedValue != null && !mPossibleFields.contains(field)) { problemFound = true; } break; case MISSING_REQUIRED_FIELD: if (mRequired.contains(field) && trimmedValue == null) { problemFound = true; } break; case UNKNOWN_VALUE: // An empty string will never be an UNKNOWN_VALUE. It is invalid // only when it appears in a required field (In that case it will // be reported as MISSING_REQUIRED_FIELD). if (trimmedValue == null) { break; } problemFound = !isKnownInScript(script, trimmedValue); break; case UNRECOGNIZED_FORMAT: if (trimmedValue != null && mFormat != null && !mFormat.matcher(trimmedValue).matches()) { problemFound = true; } break; case MISMATCHING_VALUE: if (trimmedValue != null && mMatch != null && !mMatch.matcher(trimmedValue).lookingAt()) { problemFound = true; } break; default: throw new RuntimeException("Unknown problem: " + problem); } if (problemFound) { problems.add(field, problem); } return !problemFound; } /** * Checks the value of a particular field in a particular script against the known values for * this field. If script is null, it checks both the local and the latin values. Otherwise it * checks only the values in the script specified. */ private boolean isKnownInScript(ScriptType script, String value) { String trimmedValue = Util.trimToNull(value); Util.checkNotNull(trimmedValue); if (script == null) { return (mCandidateValues == null || mCandidateValues.containsKey(trimmedValue.toLowerCase())); } // Otherwise, if we know the script, we want to restrict the candidates to only names in // that script. String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames; Set candidates = new HashSet(); if (namesToConsider != null) { for (String name : namesToConsider) { candidates.add(name.toLowerCase()); } } if (mKeys != null) { for (String name : mKeys) { candidates.add(name.toLowerCase()); } } if (candidates.size() == 0 || trimmedValue == null) { return true; } return candidates.contains(value.toLowerCase()); } /** * Parses the value of the "fmt" key in the data to see which fields are used for a particular * country. Returns a list of all fields found. Country is always assumed to be present. Skips * characters that indicate new-lines in the format information, as well as any characters not * escaped with "%". */ private static Set parseAddressFields(String value) { EnumSet result = EnumSet.of(AddressField.COUNTRY); boolean escaped = false; for (char c : value.toCharArray()) { if (escaped) { escaped = false; if (c == 'n') { continue; } AddressField f = AddressField.of(c); if (f == null) { throw new RuntimeException( "Unrecognized character '" + c + "' in format pattern: " + value); } result.add(f); } else if (c == '%') { escaped = true; } } // These fields are not mentioned in the metadata at the moment since there is an effort to // move away from STREET_ADDRESS and use these fields instead. This means they have to be // removed here. result.remove(AddressField.ADDRESS_LINE_1); result.remove(AddressField.ADDRESS_LINE_2); return result; } /** * Parses the value of the "required" key in the data. Adds country as well as any other field * mentioned in the string. */ private static Set parseRequireString(String value) { // Country is always required EnumSet result = EnumSet.of(AddressField.COUNTRY); for (char c : value.toCharArray()) { AddressField f = AddressField.of(c); if (f == null) { throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: " + value); } result.add(f); } // These fields are not mentioned in the metadata at the moment since there is an effort to // move away from STREET_ADDRESS and use these fields instead. This means they have to be // removed here. result.remove(AddressField.ADDRESS_LINE_1); result.remove(AddressField.ADDRESS_LINE_2); return result; } /** * Returns true if this key represents a country. We assume all keys with only one delimiter are * at the country level (such as "data/US"). */ private boolean isCountryKey() { Util.checkNotNull(mId, "Cannot use null as key"); return mId.split(KEY_DELIMITER).length == 2; } }