190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/* 290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * you may not use this file except in compliance with the License. 690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * You may obtain a copy of the License at 790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * 1090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 1190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 1290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * See the License for the specific language governing permissions and 1490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * limitations under the License 1590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 1790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 1890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovimport com.ibm.icu4jni.text.CollationAttribute; 1902123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstromimport com.ibm.icu4jni.text.CollationKey; // TODO: java.text.CollationKey post-froyo 2090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovimport com.ibm.icu4jni.text.Collator; 2190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovimport com.ibm.icu4jni.text.RuleBasedCollator; 2202123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstromimport java.util.Locale; 2390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 2490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov/** 2590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts a name to a normalized form by removing all non-letter characters and normalizing 2690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * UNICODE according to http://unicode.org/unicode/reports/tr15 2790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 2890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikovpublic class NameNormalizer { 2990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 3090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov private static final RuleBasedCollator sCompressingCollator; 3190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov static { 3202123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstrom sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 3390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sCompressingCollator.setStrength(Collator.PRIMARY); 3490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 3590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 3690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 3790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov private static final RuleBasedCollator sComplexityCollator; 3890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov static { 3902123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstrom sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 4090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sComplexityCollator.setStrength(Collator.TERTIARY); 4190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov sComplexityCollator.setAttribute(CollationAttribute.CASE_FIRST, 4290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov CollationAttribute.VALUE_LOWER_FIRST); 4390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 4490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 4590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 4690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Converts the supplied name to a string that can be used to perform approximate matching 4790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * of names. It ignores non-letter characters and removes accents. 4890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 4990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static String normalize(String name) { 5002123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstrom CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name)); 5102123b7cc8336ec5481f20c12ce5c7f2b9f044d4Brian Carlstrom return Hex.encodeHex(key.toByteArray(), true); 5290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 5390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 5490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 5590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Compares "complexity" of two names, which is determined by the presence 5690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * of mixed case characters, accents and, if all else is equal, length. 5790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 5890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov public static int compareComplexity(String name1, String name2) { 59f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov int diff = sComplexityCollator.compare(lettersAndDigitsOnly(name1), 60f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov lettersAndDigitsOnly(name2)); 6190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (diff != 0) { 6290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return diff; 6390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 6490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 6590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name1.length() - name2.length(); 6690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 6790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 6890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov /** 6990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov * Returns a string containing just the letters from the original string. 7090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov */ 71f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov private static String lettersAndDigitsOnly(String name) { 7290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov char[] letters = name.toCharArray(); 7390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov int length = 0; 7490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov for (int i = 0; i < letters.length; i++) { 7590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov final char c = letters[i]; 76f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (Character.isLetterOrDigit(c)) { 7790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov letters[length++] = c; 7890dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 7990dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8090dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 8190dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov if (length != letters.length) { 8290dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return new String(letters, 0, length); 8390dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8490dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov 8590dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov return name; 8690dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov } 8790dfb8e292caac95c84767aeea6069fad0052373Dmitri Plotnikov} 88