190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/*
290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project
390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * you may not use this file except in compliance with the License.
690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * You may obtain a copy of the License at
790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov *
1090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
1190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
1290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * See the License for the specific language governing permissions and
1490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * limitations under the License
1590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */
1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts;
1790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
18633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughesimport java.util.Locale;
19d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.Collator;
20168d5437461d59535cda2b9ccf1ce9a8a5bc8688Elliott Hughesimport java.text.CollationKey;
21d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.RuleBasedCollator;
2290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
2390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/**
2490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts a name to a normalized form by removing all non-letter characters and normalizing
2590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * UNICODE according to http://unicode.org/unicode/reports/tr15
2690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */
2790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovpublic class NameNormalizer {
2890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
2990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    private static final RuleBasedCollator sCompressingCollator;
3090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    static {
31633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes        sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
3290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        sCompressingCollator.setStrength(Collator.PRIMARY);
3390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
3490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
3590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
3690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    private static final RuleBasedCollator sComplexityCollator;
3790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    static {
38633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes        sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault());
39d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        sComplexityCollator.setStrength(Collator.SECONDARY);
4090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
4190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
4290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
4390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * Converts the supplied name to a string that can be used to perform approximate matching
44d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes     * of names.  It ignores non-letter, non-digit characters, and removes accents.
4590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
4690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    public static String normalize(String name) {
47633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes        CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name));
48633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes        return Hex.encodeHex(key.toByteArray(), true);
4990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
5090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
5190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
5290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * Compares "complexity" of two names, which is determined by the presence
5390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     * of mixed case characters, accents and, if all else is equal, length.
5490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
5590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    public static int compareComplexity(String name1, String name2) {
56d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        String clean1 = lettersAndDigitsOnly(name1);
57d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        String clean2 = lettersAndDigitsOnly(name2);
58d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        int diff = sComplexityCollator.compare(clean1, clean2);
59d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        if (diff != 0) {
60d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes            return diff;
61d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        }
62d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // compareTo sorts uppercase first. We know that there are no non-case
63d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // differences from the above test, so we can negate here to get the
64d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        // lowercase-first comparison we really want...
65d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes        diff = -clean1.compareTo(clean2);
6690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        if (diff != 0) {
6790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            return diff;
6890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
6990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        return name1.length() - name2.length();
7090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
7190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
7290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    /**
73d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes     * Returns a string containing just the letters and digits from the original string.
7490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov     */
75f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    private static String lettersAndDigitsOnly(String name) {
7690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        char[] letters = name.toCharArray();
7790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        int length = 0;
7890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        for (int i = 0; i < letters.length; i++) {
7990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            final char c = letters[i];
80f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            if (Character.isLetterOrDigit(c)) {
8190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov                letters[length++] = c;
8290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            }
8390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
8490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
8590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        if (length != letters.length) {
8690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov            return new String(letters, 0, length);
8790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        }
8890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov
8990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov        return name;
9090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov    }
9190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov}
92