190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/* 290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * you may not use this file except in compliance with the License. 690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * You may obtain a copy of the License at 790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 1090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 1190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 1290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * See the License for the specific language governing permissions and 1490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * limitations under the License 1590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 1790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 18633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughesimport java.util.Locale; 19d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.Collator; 20168d5437461d59535cda2b9ccf1ce9a8a5bc8688Elliott Hughesimport java.text.CollationKey; 21d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughesimport java.text.RuleBasedCollator; 2290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 2390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/** 2490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts a name to a normalized form by removing all non-letter characters and normalizing 2590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * UNICODE according to http://unicode.org/unicode/reports/tr15 2690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 2790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovpublic class NameNormalizer { 2890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 2990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov private static final RuleBasedCollator sCompressingCollator; 3090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov static { 31633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 3290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sCompressingCollator.setStrength(Collator.PRIMARY); 3390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 3490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 3590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 3690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov private static final RuleBasedCollator sComplexityCollator; 3790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov static { 38633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 39d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes sComplexityCollator.setStrength(Collator.SECONDARY); 4090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 4190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 4290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 4390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts the supplied name to a string that can be used to perform approximate matching 44d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes * of names. It ignores non-letter, non-digit characters, and removes accents. 4590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 4690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static String normalize(String name) { 47633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name)); 48633d446a21eaceecc0948893769f82163f1cb1e5Elliott Hughes return Hex.encodeHex(key.toByteArray(), true); 4990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 5090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 5190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 5290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Compares "complexity" of two names, which is determined by the presence 5390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * of mixed case characters, accents and, if all else is equal, length. 5490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 5590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static int compareComplexity(String name1, String name2) { 56d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes String clean1 = lettersAndDigitsOnly(name1); 57d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes String clean2 = lettersAndDigitsOnly(name2); 58d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes int diff = sComplexityCollator.compare(clean1, clean2); 59d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes if (diff != 0) { 60d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes return diff; 61d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes } 62d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // compareTo sorts uppercase first. We know that there are no non-case 63d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // differences from the above test, so we can negate here to get the 64d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes // lowercase-first comparison we really want... 65d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes diff = -clean1.compareTo(clean2); 6690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (diff != 0) { 6790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return diff; 6890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 6990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name1.length() - name2.length(); 7090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 7190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 7290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 73d8272f91a5354b855f5d99db9e1ca4478c93479dElliott Hughes * Returns a string containing just the letters and digits from the original string. 7490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 75f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov private static String lettersAndDigitsOnly(String name) { 7690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov char[] letters = name.toCharArray(); 7790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov int length = 0; 7890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov for (int i = 0; i < letters.length; i++) { 7990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov final char c = letters[i]; 80f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (Character.isLetterOrDigit(c)) { 8190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov letters[length++] = c; 8290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 8590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (length != letters.length) { 8690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return new String(letters, 0, length); 8790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 8990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name; 9090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 9190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov} 92