NameSplitter.java revision c19e02a37399c55b852d6570f73553e859b0139a
14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/* 24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License. 64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at 74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and 144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License 154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet; 194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer; 204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/** 224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last 234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has 244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect. 254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p> 264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm: 274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol> 284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li> 294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li> 304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li> 314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use 324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li> 334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li> 344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol> 354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter { 374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mPrefixesSet; 394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mSuffixesSet; 404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final int mMaxSuffixLength; 414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mLastNamePrefixesSet; 424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mConjuctions; 434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public static class Name { 454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private String prefix; 464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private String givenNames; 474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private String middleName; 484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private String familyName; 494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private String suffix; 504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getPrefix() { 524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return prefix; 534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getGivenNames() { 564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return givenNames; 574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getMiddleName() { 604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return middleName; 614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getFamilyName() { 644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return familyName; 654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getSuffix() { 684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return suffix; 694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static class NameTokenizer extends StringTokenizer { 734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static final int MAX_TOKENS = 10; 744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final String[] mTokens; 754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mDotBitmask; 764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mStartPointer; 774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mEndPointer; 784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameTokenizer(String fullName) { 804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov super(fullName, " .,", true); 814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens = new String[MAX_TOKENS]; 834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Iterate over tokens, skipping over empty ones and marking tokens that 854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // are followed by dots. 864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (hasMoreTokens() && mEndPointer < MAX_TOKENS) { 874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final String token = nextToken(); 884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (token.length() > 0) { 894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final char c = token.charAt(0); 904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (c == ' ' || c == ',') { 914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov continue; 924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mEndPointer > 0 && token.charAt(0) == '.') { 964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mDotBitmask |= (1 << (mEndPointer - 1)); 974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens[mEndPointer] = token; 994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mEndPointer++; 1004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Returns true if the token is followed by a dot in the original full name. 1064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public boolean hasDot(int index) { 1084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return (mDotBitmask & (1 << index)) != 0; 1094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Constructor. 1144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 1154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonPrefixes comma-separated list of common prefixes, 1164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Mr, Ms, Mrs" 1174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonLastNamePrefixes comma-separated list of common last name prefixes, 1184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "d', st, st., von" 1194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonSuffixes comma-separated list of common suffixes, 1204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Jr, M.D., MD, D.D.S." 1214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonConjunctions comma-separated list of common conjuctions, 1224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "AND, Or" 1234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameSplitter(String commonPrefixes, String commonLastNamePrefixes, 1254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String commonSuffixes, String commonConjunctions) { 1264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mPrefixesSet = convertToSet(commonPrefixes); 1274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes); 1284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mSuffixesSet = convertToSet(commonSuffixes); 1294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mConjuctions = convertToSet(commonConjunctions); 1304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int maxLength = 0; 1324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (String suffix : mSuffixesSet) { 1334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (suffix.length() > maxLength) { 1344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov maxLength = suffix.length(); 1354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mMaxSuffixLength = maxLength; 1394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Converts a comma-separated list of Strings to a set of Strings. Trims strings 1434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * and converts them to upper case. 1444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static HashSet<String> convertToSet(String strings) { 1464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov HashSet<String> set = new HashSet<String>(); 1474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (strings != null) { 1484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String[] split = strings.split(","); 1494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = 0; i < split.length; i++) { 1504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov set.add(split[i].trim().toUpperCase()); 1514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return set; 1544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses a full name and returns parsed components in the Name object. 1584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public void split(Name name, String fullName) { 1604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (fullName == null) { 1614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 1624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov NameTokenizer tokens = new NameTokenizer(fullName); 1654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parsePrefix(name, tokens); 166c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 167c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // If the name consists of just one or two tokens, treat them as first/last name, 168c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // not as suffix. Example: John Ma; Ma is last name, not "M.A.". 169c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov if (tokens.mEndPointer > 2) { 170c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov parseSuffix(name, tokens); 171c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov } 172c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 1734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parseLastName(name, tokens); 1744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parseMiddleName(name, tokens); 1754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parseGivenNames(name, tokens); 1764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the first word from the name if it is a prefix. 1804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parsePrefix(Name name, NameTokenizer tokens) { 1824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 1834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 1844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String firstToken = tokens.mTokens[tokens.mStartPointer]; 1874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mPrefixesSet.contains(firstToken.toUpperCase())) { 1884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.prefix = firstToken; 1894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mStartPointer++; 1904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the last word(s) from the name if it is a suffix. 1954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseSuffix(Name name, NameTokenizer tokens) { 1974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 1984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 1994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastToken = tokens.mTokens[tokens.mEndPointer - 1]; 2024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (lastToken.length() > mMaxSuffixLength) { 2034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String normalized = lastToken.toUpperCase(); 2074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 2084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 2094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 2104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 2144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken += '.'; 2154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized += "."; 2174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Take care of suffixes like M.D. and D.D.S. 2194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int pos = tokens.mEndPointer - 1; 2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (normalized.length() <= mMaxSuffixLength) { 2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 2234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 2244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer = pos; 2254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (pos == tokens.mStartPointer) { 2294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov break; 2304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov pos--; 2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(pos)) { 2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + "." + lastToken; 2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + " " + lastToken; 2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized; 2404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseLastName(Name name, NameTokenizer tokens) { 2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = tokens.mTokens[tokens.mEndPointer - 1]; 2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Take care of last names like "D'Onofrio" and "von Cliburn" 2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 0) { 2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1]; 2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final String normalized = lastNamePrefix.toUpperCase(); 2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mLastNamePrefixesSet.contains(normalized) 2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov || mLastNamePrefixesSet.contains(normalized + ".")) { 2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 2584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastNamePrefix += '.'; 2594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = lastNamePrefix + " " + name.familyName; 2614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 2624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseMiddleName(Name name, NameTokenizer tokens) { 2684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 2694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 1) { 2734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 2 2744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2]. 2754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov toUpperCase())) { 2764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.middleName = tokens.mTokens[tokens.mEndPointer - 1]; 2774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 2784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseGivenNames(Name name, NameTokenizer tokens) { 2834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 2844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 1) { 2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = tokens.mTokens[tokens.mStartPointer]; 2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 2904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov StringBuilder sb = new StringBuilder(); 2914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) { 2924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (i != tokens.mStartPointer) { 2934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(' '); 2944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(tokens.mTokens[i]); 2964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(i)) { 2974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append('.'); 2984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = sb.toString(); 3014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov} 304