NameNormalizer.java revision 168d5437461d59535cda2b9ccf1ce9a8a5bc8688
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16package com.android.providers.contacts;
17
18import com.ibm.icu4jni.text.CollationAttribute;
19import com.ibm.icu4jni.text.Collator;
20import com.ibm.icu4jni.text.RuleBasedCollator;
21import java.util.Locale;
22import java.text.CollationKey;
23
24/**
25 * Converts a name to a normalized form by removing all non-letter characters and normalizing
26 * UNICODE according to http://unicode.org/unicode/reports/tr15
27 */
28public class NameNormalizer {
29
30    private static final RuleBasedCollator sCompressingCollator;
31    static {
32        sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
33        sCompressingCollator.setStrength(Collator.PRIMARY);
34        sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
35    }
36
37    private static final RuleBasedCollator sComplexityCollator;
38    static {
39        sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
40        sComplexityCollator.setStrength(Collator.TERTIARY);
41        sComplexityCollator.setAttribute(CollationAttribute.CASE_FIRST,
42                CollationAttribute.VALUE_LOWER_FIRST);
43    }
44
45    /**
46     * Converts the supplied name to a string that can be used to perform approximate matching
47     * of names.  It ignores non-letter characters and removes accents.
48     */
49    public static String normalize(String name) {
50        CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name));
51        return Hex.encodeHex(key.toByteArray(), true);
52    }
53
54    /**
55     * Compares "complexity" of two names, which is determined by the presence
56     * of mixed case characters, accents and, if all else is equal, length.
57     */
58    public static int compareComplexity(String name1, String name2) {
59        int diff = sComplexityCollator.compare(lettersAndDigitsOnly(name1),
60                lettersAndDigitsOnly(name2));
61        if (diff != 0) {
62            return diff;
63        }
64
65        return name1.length() - name2.length();
66    }
67
68    /**
69     * Returns a string containing just the letters from the original string.
70     */
71    private static String lettersAndDigitsOnly(String name) {
72        char[] letters = name.toCharArray();
73        int length = 0;
74        for (int i = 0; i < letters.length; i++) {
75            final char c = letters[i];
76            if (Character.isLetterOrDigit(c)) {
77                letters[length++] = c;
78            }
79        }
80
81        if (length != letters.length) {
82            return new String(letters, 0, length);
83        }
84
85        return name;
86    }
87}
88