190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/* 290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * you may not use this file except in compliance with the License. 690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * You may obtain a copy of the License at 790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 1090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 1190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 1290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * See the License for the specific language governing permissions and 1490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * limitations under the License 1590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 1790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 1881567f4a0f7c9c338506bd82f4d33e83c2ccf159Makoto Onukiimport com.android.providers.contacts.util.Hex; 19d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onukiimport com.google.common.annotations.VisibleForTesting; 2081567f4a0f7c9c338506bd82f4d33e83c2ccf159Makoto Onuki 21168d5437461d59535cda2b9ccf1ce9a8a5bc8688Elliott Hughesimport java.text.CollationKey; 2238210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport java.text.Collator; 23d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.RuleBasedCollator; 2438210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport java.util.Locale; 2590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 2690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/** 2790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts a name to a normalized form by removing all non-letter characters and normalizing 2890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * UNICODE according to http://unicode.org/unicode/reports/tr15 2990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 3090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovpublic class NameNormalizer { 3190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 32d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki private static final Object sCollatorLock = new Object(); 33d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 34d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki private static Locale sCollatorLocale; 35d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 36d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki private static RuleBasedCollator sCachedCompressingCollator; 37d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki private static RuleBasedCollator sCachedComplexityCollator; 38d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 39d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki /** 40d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki * Ensure that the cached collators are for the current locale. 41d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki */ 42d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki private static void ensureCollators() { 43d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki final Locale locale = Locale.getDefault(); 44d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki if (locale.equals(sCollatorLocale)) { 45d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki return; 46d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki } 47d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCollatorLocale = locale; 48d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 49d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCachedCompressingCollator = (RuleBasedCollator) Collator.getInstance(locale); 50d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCachedCompressingCollator.setStrength(Collator.PRIMARY); 51d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCachedCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 52d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 53d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCachedComplexityCollator = (RuleBasedCollator) Collator.getInstance(locale); 54d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki sCachedComplexityCollator.setStrength(Collator.SECONDARY); 55d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki } 56d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki 57d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki @VisibleForTesting 58d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki static RuleBasedCollator getCompressingCollator() { 59d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki synchronized (sCollatorLock) { 60d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki ensureCollators(); 61d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki return sCachedCompressingCollator; 62d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki } 6390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 6490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 65d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki @VisibleForTesting 66d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki static RuleBasedCollator getComplexityCollator() { 67d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki synchronized (sCollatorLock) { 68d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki ensureCollators(); 69d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki return sCachedComplexityCollator; 70d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki } 7190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 7290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 7390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 7490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts the supplied name to a string that can be used to perform approximate matching 75d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes * of names. It ignores non-letter, non-digit characters, and removes accents. 7690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 7790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static String normalize(String name) { 78d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki CollationKey key = getCompressingCollator().getCollationKey(lettersAndDigitsOnly(name)); 79633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes return Hex.encodeHex(key.toByteArray(), true); 8090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 8290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 8390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Compares "complexity" of two names, which is determined by the presence 8490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * of mixed case characters, accents and, if all else is equal, length. 8590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 8690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static int compareComplexity(String name1, String name2) { 87d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes String clean1 = lettersAndDigitsOnly(name1); 88d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes String clean2 = lettersAndDigitsOnly(name2); 89d2f6ad6d50b5570327f8cca3b2d2bdcaec36ea90Makoto Onuki int diff = getComplexityCollator().compare(clean1, clean2); 90d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes if (diff != 0) { 91d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes return diff; 92d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes } 93d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // compareTo sorts uppercase first. We know that there are no non-case 94d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // differences from the above test, so we can negate here to get the 95d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // lowercase-first comparison we really want... 96d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes diff = -clean1.compareTo(clean2); 9790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (diff != 0) { 9890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return diff; 9990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 10090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name1.length() - name2.length(); 10190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 10290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 10390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 104d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes * Returns a string containing just the letters and digits from the original string. 1053ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner * Returns empty string if the original string is null. 10690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 107f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov private static String lettersAndDigitsOnly(String name) { 1083ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner if (name == null) { 1093ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner return ""; 1103ef26b5979ca194cc0a528825794be0386508ccfJay Shrauner } 11190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov char[] letters = name.toCharArray(); 11290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov int length = 0; 11390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov for (int i = 0; i < letters.length; i++) { 11490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov final char c = letters[i]; 115f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (Character.isLetterOrDigit(c)) { 11690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov letters[length++] = c; 11790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 11890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 11990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 12090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (length != letters.length) { 12190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return new String(letters, 0, length); 12290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 12390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 12490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name; 12590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 12690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov} 127