1069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project/* 2069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Copyright 2001-2004 The Apache Software Foundation. 3069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 4069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 5069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * you may not use this file except in compliance with the License. 6069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * You may obtain a copy of the License at 7069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 8069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 9069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 10069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 11069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 12069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * See the License for the specific language governing permissions and 14069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * limitations under the License. 15069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 16069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 17069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectpackage org.apache.commons.codec.language; 18069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 19069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectimport org.apache.commons.codec.EncoderException; 20069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectimport org.apache.commons.codec.StringEncoder; 21069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 22069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project/** 23069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. 24069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 25069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @author Apache Software Foundation 26069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @version $Id: SoundexUtils.java,v 1.5 2004/03/17 18:31:35 ggregory Exp $ 27069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @since 1.3 28069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 29069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectfinal class SoundexUtils { 30069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 31069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 32069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Cleans up the input string before Soundex processing by only returning 33069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * upper case letters. 34069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 35069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param str 36069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * The String to clean. 37069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return A clean String. 38069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 39069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project static String clean(String str) { 40069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (str == null || str.length() == 0) { 41069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return str; 42069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 43069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project int len = str.length(); 44069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project char[] chars = new char[len]; 45069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project int count = 0; 46069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project for (int i = 0; i < len; i++) { 47069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (Character.isLetter(str.charAt(i))) { 48069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project chars[count++] = str.charAt(i); 49069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 50069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 51069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (count == len) { 52069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return str.toUpperCase(); 53069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 54069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return new String(chars, 0, count).toUpperCase(); 55069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 56069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 57069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 58069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Encodes the Strings and returns the number of characters in the two 59069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * encoded Strings that are the same. 60069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <ul> 61069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 62069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * little or no similarity, and 4 indicates strong similarity or identical 63069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * values.</li> 64069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <li>For refined Soundex, the return value can be greater than 4.</li> 65069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * </ul> 66069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 67069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param encoder 68069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * The encoder to use to encode the Strings. 69069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param s1 70069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * A String that will be encoded and compared. 71069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param s2 72069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * A String that will be encoded and compared. 73069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return The number of characters in the two Soundex encoded Strings that 74069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * are the same. 75069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 76069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @see #differenceEncoded(String,String) 77069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 78069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * MS T-SQL DIFFERENCE</a> 79069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 80069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @throws EncoderException 81069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * if an error occurs encoding one of the strings 82069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 83069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { 84069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); 85069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 86069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 87069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 88069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Returns the number of characters in the two Soundex encoded Strings that 89069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * are the same. 90069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <ul> 91069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 92069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * little or no similarity, and 4 indicates strong similarity or identical 93069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * values.</li> 94069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <li>For refined Soundex, the return value can be greater than 4.</li> 95069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * </ul> 96069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 97069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param es1 98069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * An encoded String. 99069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param es2 100069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * An encoded String. 101069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return The number of characters in the two Soundex encoded Strings that 102069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * are the same. 103069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 104069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 105069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * MS T-SQL DIFFERENCE</a> 106069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 107069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project static int differenceEncoded(String es1, String es2) { 108069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 109069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (es1 == null || es2 == null) { 110069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return 0; 111069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 112069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project int lengthToMatch = Math.min(es1.length(), es2.length()); 113069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project int diff = 0; 114069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project for (int i = 0; i < lengthToMatch; i++) { 115069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (es1.charAt(i) == es2.charAt(i)) { 116069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project diff++; 117069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 118069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 119069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return diff; 120069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 121069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 122069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project} 123