1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16package com.android.providers.contacts; 17 18import java.util.Locale; 19import java.text.Collator; 20import java.text.CollationKey; 21import java.text.RuleBasedCollator; 22 23/** 24 * Converts a name to a normalized form by removing all non-letter characters and normalizing 25 * UNICODE according to http://unicode.org/unicode/reports/tr15 26 */ 27public class NameNormalizer { 28 29 private static final RuleBasedCollator sCompressingCollator; 30 static { 31 sCompressingCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 32 sCompressingCollator.setStrength(Collator.PRIMARY); 33 sCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 34 } 35 36 private static final RuleBasedCollator sComplexityCollator; 37 static { 38 sComplexityCollator = (RuleBasedCollator)Collator.getInstance(Locale.getDefault()); 39 sComplexityCollator.setStrength(Collator.SECONDARY); 40 } 41 42 /** 43 * Converts the supplied name to a string that can be used to perform approximate matching 44 * of names. It ignores non-letter, non-digit characters, and removes accents. 45 */ 46 public static String normalize(String name) { 47 CollationKey key = sCompressingCollator.getCollationKey(lettersAndDigitsOnly(name)); 48 return Hex.encodeHex(key.toByteArray(), true); 49 } 50 51 /** 52 * Compares "complexity" of two names, which is determined by the presence 53 * of mixed case characters, accents and, if all else is equal, length. 54 */ 55 public static int compareComplexity(String name1, String name2) { 56 String clean1 = lettersAndDigitsOnly(name1); 57 String clean2 = lettersAndDigitsOnly(name2); 58 int diff = sComplexityCollator.compare(clean1, clean2); 59 if (diff != 0) { 60 return diff; 61 } 62 // compareTo sorts uppercase first. We know that there are no non-case 63 // differences from the above test, so we can negate here to get the 64 // lowercase-first comparison we really want... 65 diff = -clean1.compareTo(clean2); 66 if (diff != 0) { 67 return diff; 68 } 69 return name1.length() - name2.length(); 70 } 71 72 /** 73 * Returns a string containing just the letters and digits from the original string. 74 */ 75 private static String lettersAndDigitsOnly(String name) { 76 char[] letters = name.toCharArray(); 77 int length = 0; 78 for (int i = 0; i < letters.length; i++) { 79 final char c = letters[i]; 80 if (Character.isLetterOrDigit(c)) { 81 letters[length++] = c; 82 } 83 } 84 85 if (length != letters.length) { 86 return new String(letters, 0, length); 87 } 88 89 return name; 90 } 91} 92