1069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project/* 2069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Copyright 2001-2004 The Apache Software Foundation. 3069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 4069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License"); 5069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * you may not use this file except in compliance with the License. 6069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * You may obtain a copy of the License at 7069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 8069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * http://www.apache.org/licenses/LICENSE-2.0 9069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 10069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Unless required by applicable law or agreed to in writing, software 11069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS, 12069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * See the License for the specific language governing permissions and 14069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * limitations under the License. 15069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 16069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 17069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectpackage org.apache.commons.codec.language; 18069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 19069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectimport org.apache.commons.codec.EncoderException; 20069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectimport org.apache.commons.codec.StringEncoder; 21069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 22069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project/** 23069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Encodes a string into a Refined Soundex value. A refined soundex code is 24069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * optimized for spell checking words. Soundex method originally developed by 25069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. 26069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 27069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @author Apache Software Foundation 28069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @version $Id: RefinedSoundex.java,v 1.21 2004/06/05 18:32:04 ggregory Exp $ 29069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 30069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Projectpublic class RefinedSoundex implements StringEncoder { 31069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 32069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 33069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * This static variable contains an instance of the RefinedSoundex using 34069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * the US_ENGLISH mapping. 35069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 36069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public static final RefinedSoundex US_ENGLISH = new RefinedSoundex(); 37069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 38069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 39069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * RefinedSoundex is *refined* for a number of reasons one being that the 40069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * mappings have been altered. This implementation contains default 41069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * mappings for US English. 42069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 43069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public static final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray(); 44069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 45069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 46069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Every letter of the alphabet is "mapped" to a numerical value. This char 47069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * array holds the values to which each letter is mapped. This 48069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * implementation contains a default map for US_ENGLISH 49069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 50069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project private char[] soundexMapping; 51069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 52069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 53069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Creates an instance of the RefinedSoundex object using the default US 54069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * English mapping. 55069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 56069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public RefinedSoundex() { 57069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project this(US_ENGLISH_MAPPING); 58069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 59069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 60069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 61069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Creates a refined soundex instance using a custom mapping. This 62069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * constructor can be used to customize the mapping, and/or possibly 63069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * provide an internationalized mapping for a non-Western character set. 64069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 65069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param mapping 66069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Mapping array to use when finding the corresponding code for 67069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * a given character 68069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 69069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public RefinedSoundex(char[] mapping) { 70069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project this.soundexMapping = mapping; 71069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 72069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 73069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project // BEGIN android-note 74069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project // Removed @see reference to SoundexUtils below, since the class isn't 75069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project // public. 76069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project // END android-note 77069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 78069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Returns the number of characters in the two encoded Strings that are the 79069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * same. This return value ranges from 0 to the length of the shortest 80069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for 81069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * example) indicates strong similarity or identical values. For refined 82069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Soundex, the return value can be greater than 4. 83069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 84069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param s1 85069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * A String that will be encoded and compared. 86069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param s2 87069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * A String that will be encoded and compared. 88069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return The number of characters in the two encoded Strings that are the 89069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * same from 0 to to the length of the shortest encoded String. 90069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 91069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 92069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * MS T-SQL DIFFERENCE</a> 93069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 94069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @throws EncoderException 95069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * if an error occurs encoding one of the strings 96069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @since 1.3 97069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 98069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public int difference(String s1, String s2) throws EncoderException { 99069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return SoundexUtils.difference(this, s1, s2); 100069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 101069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 102069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 103069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Encodes an Object using the refined soundex algorithm. This method is 104069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * provided in order to satisfy the requirements of the Encoder interface, 105069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * and will throw an EncoderException if the supplied object is not of type 106069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * java.lang.String. 107069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 108069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param pObject 109069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Object to encode 110069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return An object (or type java.lang.String) containing the refined 111069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * soundex code which corresponds to the String supplied. 112069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @throws EncoderException 113069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * if the parameter supplied is not of type java.lang.String 114069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 115069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public Object encode(Object pObject) throws EncoderException { 116069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (!(pObject instanceof java.lang.String)) { 117069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); 118069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 119069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return soundex((String) pObject); 120069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 121069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 122069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 123069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Encodes a String using the refined soundex algorithm. 124069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 125069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param pString 126069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * A String object to encode 127069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return A Soundex code corresponding to the String supplied 128069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 129069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public String encode(String pString) { 130069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return soundex(pString); 131069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 132069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 133069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 134069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Returns the mapping code for a given character. The mapping codes are 135069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * maintained in an internal char array named soundexMapping, and the 136069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * default values of these mappings are US English. 137069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 138069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param c 139069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * char to get mapping for 140069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return A character (really a numeral) to return for the given char 141069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 142069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project char getMappingCode(char c) { 143069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (!Character.isLetter(c)) { 144069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return 0; 145069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 146069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return this.soundexMapping[Character.toUpperCase(c) - 'A']; 147069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 148069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 149069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project /** 150069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * Retreives the Refined Soundex code for a given String object. 151069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * 152069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @param str 153069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * String to encode using the Refined Soundex algorithm 154069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project * @return A soundex code for the String supplied 155069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project */ 156069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project public String soundex(String str) { 157069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (str == null) { 158069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return null; 159069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 160069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project str = SoundexUtils.clean(str); 161069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (str.length() == 0) { 162069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return str; 163069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 164069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 165069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project StringBuffer sBuf = new StringBuffer(); 166069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project sBuf.append(str.charAt(0)); 167069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 168069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project char last, current; 169069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project last = '*'; 170069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 171069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project for (int i = 0; i < str.length(); i++) { 172069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 173069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project current = getMappingCode(str.charAt(i)); 174069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project if (current == last) { 175069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project continue; 176069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } else if (current != 0) { 177069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project sBuf.append(current); 178069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 179069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 180069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project last = current; 181069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 182069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 183069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project 184069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project return sBuf.toString(); 185069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project } 186069490a5ca2fd1988d29daf45d892f47ad665115The Android Open Source Project} 187