NameNormalizer.java revision 81567f4a0f7c9c338506bd82f4d33e83c2ccf159
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16package com.android.providers.contacts;
17
18import com.android.providers.contacts.util.Hex;
19
20import java.util.Locale;
21import java.text.Collator;
22import java.text.CollationKey;
23import java.text.RuleBasedCollator;
24
25/**
26 * Converts a name to a normalized form by removing all non-letter characters and normalizing
27 * UNICODE according to http://unicode.org/unicode/reports/tr15
28 */
29public class NameNormalizer {
30
31    private static final RuleBasedCollator sCompressingCollator;
32    static {
33        sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
34        sCompressingCollator.setStrength(Collator.PRIMARY);
35        sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
36    }
37
38    private static final RuleBasedCollator sComplexityCollator;
39    static {
40        sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
41        sComplexityCollator.setStrength(Collator.SECONDARY);
42    }
43
44    /**
45     * Converts the supplied name to a string that can be used to perform approximate matching
46     * of names.  It ignores non-letter, non-digit characters, and removes accents.
47     */
48    public static String normalize(String name) {
49        CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name));
50        return Hex.encodeHex(key.toByteArray(), true);
51    }
52
53    /**
54     * Compares "complexity" of two names, which is determined by the presence
55     * of mixed case characters, accents and, if all else is equal, length.
56     */
57    public static int compareComplexity(String name1, String name2) {
58        String clean1 = lettersAndDigitsOnly(name1);
59        String clean2 = lettersAndDigitsOnly(name2);
60        int diff = sComplexityCollator.compare(clean1, clean2);
61        if (diff != 0) {
62            return diff;
63        }
64        // compareTo sorts uppercase first. We know that there are no non-case
65        // differences from the above test, so we can negate here to get the
66        // lowercase-first comparison we really want...
67        diff = -clean1.compareTo(clean2);
68        if (diff != 0) {
69            return diff;
70        }
71        return name1.length() - name2.length();
72    }
73
74    /**
75     * Returns a string containing just the letters and digits from the original string.
76     */
77    private static String lettersAndDigitsOnly(String name) {
78        char[] letters = name.toCharArray();
79        int length = 0;
80        for (int i = 0; i < letters.length; i++) {
81            final char c = letters[i];
82            if (Character.isLetterOrDigit(c)) {
83                letters[length++] = c;
84            }
85        }
86
87        if (length != letters.length) {
88            return new String(letters, 0, length);
89        }
90
91        return name;
92    }
93}
94