NameNormalizer.java revision 81567f4a0f7c9c338506bd82f4d33e83c2ccf159
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16package com.android.providers.contacts; 17 18import com.android.providers.contacts.util.Hex; 19 20import java.util.Locale; 21import java.text.Collator; 22import java.text.CollationKey; 23import java.text.RuleBasedCollator; 24 25/** 26 * Converts a name to a normalized form by removing all non-letter characters and normalizing 27 * UNICODE according to http://unicode.org/unicode/reports/tr15 28 */ 29public class NameNormalizer { 30 31 private static final RuleBasedCollator sCompressingCollator; 32 static { 33 sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 34 sCompressingCollator.setStrength(Collator.PRIMARY); 35 sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 36 } 37 38 private static final RuleBasedCollator sComplexityCollator; 39 static { 40 sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 41 sComplexityCollator.setStrength(Collator.SECONDARY); 42 } 43 44 /** 45 * Converts the supplied name to a string that can be used to perform approximate matching 46 * of names. It ignores non-letter, non-digit characters, and removes accents. 47 */ 48 public static String normalize(String name) { 49 CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name)); 50 return Hex.encodeHex(key.toByteArray(), true); 51 } 52 53 /** 54 * Compares "complexity" of two names, which is determined by the presence 55 * of mixed case characters, accents and, if all else is equal, length. 56 */ 57 public static int compareComplexity(String name1, String name2) { 58 String clean1 = lettersAndDigitsOnly(name1); 59 String clean2 = lettersAndDigitsOnly(name2); 60 int diff = sComplexityCollator.compare(clean1, clean2); 61 if (diff != 0) { 62 return diff; 63 } 64 // compareTo sorts uppercase first. We know that there are no non-case 65 // differences from the above test, so we can negate here to get the 66 // lowercase-first comparison we really want... 67 diff = -clean1.compareTo(clean2); 68 if (diff != 0) { 69 return diff; 70 } 71 return name1.length() - name2.length(); 72 } 73 74 /** 75 * Returns a string containing just the letters and digits from the original string. 76 */ 77 private static String lettersAndDigitsOnly(String name) { 78 char[] letters = name.toCharArray(); 79 int length = 0; 80 for (int i = 0; i < letters.length; i++) { 81 final char c = letters[i]; 82 if (Character.isLetterOrDigit(c)) { 83 letters[length++] = c; 84 } 85 } 86 87 if (length != letters.length) { 88 return new String(letters, 0, length); 89 } 90 91 return name; 92 } 93} 94