190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/*
290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project
390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * you may not use this file except in compliance with the License.
690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * You may obtain a copy of the License at
790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
1090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
1190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
1290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * See the License for the specific language governing permissions and
1490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * limitations under the License
1590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */
1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts;
1790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
1881567f4a0f7c9c338506bd82f4d33e83c2ccf159Makoto Onukiimport com.android.providers.contacts.util.Hex;
19d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onukiimport com.google.common.annotations.VisibleForTesting;
2081567f4a0f7c9c338506bd82f4d33e83c2ccf159Makoto Onuki
21168d5437461d59535cda2b9ccf1ce9a8a5bc8688Elliott Hughesimport java.text.CollationKey;
2238210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport java.text.Collator;
23d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.RuleBasedCollator;
2438210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport java.util.Locale;
2590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
2690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/**
2790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts a name to a normalized form by removing all non-letter characters and normalizing
2890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * UNICODE according to http://unicode.org/unicode/reports/tr15
2990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */
3090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovpublic class NameNormalizer {
3190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
32d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    private static final Object sCollatorLock = new Object();
33d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
34d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    private static Locale sCollatorLocale;
35d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
36d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    private static RuleBasedCollator sCachedCompressingCollator;
37d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    private static RuleBasedCollator sCachedComplexityCollator;
38d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
39d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    /**
40d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki     * Ensure that the cached collators are for the current locale.
41d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki     */
42d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    private static void ensureCollators() {
43d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        final Locale locale = Locale.getDefault();
44d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        if (locale.equals(sCollatorLocale)) {
45d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki            return;
46d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        }
47d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCollatorLocale = locale;
48d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
49d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCachedCompressingCollator = (RuleBasedCollator) Collator.getInstance(locale);
50d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCachedCompressingCollator.setStrength(Collator.PRIMARY);
51d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCachedCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
52d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
53d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCachedComplexityCollator = (RuleBasedCollator) Collator.getInstance(locale);
54d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        sCachedComplexityCollator.setStrength(Collator.SECONDARY);
55d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    }
56d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki
57d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    @VisibleForTesting
58d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    static RuleBasedCollator getCompressingCollator() {
59d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        synchronized (sCollatorLock) {
60d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki            ensureCollators();
61d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki            return sCachedCompressingCollator;
62d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        }
6390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
6490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
65d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    @VisibleForTesting
66d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki    static RuleBasedCollator getComplexityCollator() {
67d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        synchronized (sCollatorLock) {
68d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki            ensureCollators();
69d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki            return sCachedComplexityCollator;
70d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        }
7190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
7290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
7390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
7490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * Converts the supplied name to a string that can be used to perform approximate matching
75d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes     * of names.  It ignores non-letter, non-digit characters, and removes accents.
7690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
7790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    public static String normalize(String name) {
78d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        CollationKey key = getCompressingCollator().getCollationKey(lettersAndDigitsOnly(name));
79633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes        return Hex.encodeHex(key.toByteArray(), true);
8090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
8190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
8290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
8390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * Compares "complexity" of two names, which is determined by the presence
8490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * of mixed case characters, accents and, if all else is equal, length.
8590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
8690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    public static int compareComplexity(String name1, String name2) {
87d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        String clean1 = lettersAndDigitsOnly(name1);
88d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        String clean2 = lettersAndDigitsOnly(name2);
89d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki        int diff = getComplexityCollator().compare(clean1, clean2);
90d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        if (diff != 0) {
91d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes            return diff;
92d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        }
93d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // compareTo sorts uppercase first. We know that there are no non-case
94d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // differences from the above test, so we can negate here to get the
95d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // lowercase-first comparison we really want...
96d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        diff = -clean1.compareTo(clean2);
9790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        if (diff != 0) {
9890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            return diff;
9990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
10090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        return name1.length() - name2.length();
10190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
10290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
10390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
104d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes     * Returns a string containing just the letters and digits from the original string.
1053ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner     * Returns empty string if the original string is null.
10690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
107f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    private static String lettersAndDigitsOnly(String name) {
1083ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner        if (name == null) {
1093ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner            return "";
1103ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner        }
11190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        char[] letters = name.toCharArray();
11290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        int length = 0;
11390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        for (int i = 0; i < letters.length; i++) {
11490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            final char c = letters[i];
115f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            if (Character.isLetterOrDigit(c)) {
11690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov                letters[length++] = c;
11790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            }
11890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
11990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
12090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        if (length != letters.length) {
12190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            return new String(letters, 0, length);
12290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
12390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
12490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        return name;
12590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
12690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov}
127