NameSplitter.java revision 635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2
14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/* 24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License. 64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at 74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and 144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License 154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 18622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport android.content.ContentValues; 19635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.database.DatabaseUtils; 20635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.FullNameStyle; 21635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.PhoneticNameStyle; 22f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikovimport android.provider.ContactsContract.CommonDataKinds.StructuredName; 23f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikovimport android.text.TextUtils; 24622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 25635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport java.lang.Character.UnicodeBlock; 264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet; 27622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport java.util.Locale; 284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer; 294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/** 314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last 324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has 334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect. 344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p> 354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm: 364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol> 374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li> 384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li> 394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li> 404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use 414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li> 424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li> 434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol> 444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter { 464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 47f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov public static final int MAX_TOKENS = 10; 48f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 49635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 50635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase(); 51635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 52635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // This includes simplified and traditional Chinese 53635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase(); 54635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mPrefixesSet; 564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mSuffixesSet; 574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final int mMaxSuffixLength; 584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mLastNamePrefixesSet; 594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mConjuctions; 60622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey private final Locale mLocale; 61635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private final String mLanguage; 624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public static class Name { 64635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String prefix; 65635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String givenNames; 66635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String middleName; 67635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String familyName; 68635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String suffix; 69635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 70635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int fullNameStyle; 71635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 72635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticFamilyName; 73635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticMiddleName; 74635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticGivenName; 75635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 76635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int phoneticNameStyle; 774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 78622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public Name() { 79622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 80622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 81622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public Name(String prefix, String givenNames, String middleName, String familyName, 82622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey String suffix) { 83622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.prefix = prefix; 84622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.givenNames = givenNames; 85622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.middleName = middleName; 86622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.familyName = familyName; 87622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.suffix = suffix; 88622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 89622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getPrefix() { 914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return prefix; 924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getGivenNames() { 954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return givenNames; 964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getMiddleName() { 994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return middleName; 1004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getFamilyName() { 1034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return familyName; 1044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getSuffix() { 1074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return suffix; 1084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 109622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 110622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public void fromValues(ContentValues values) { 111622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey prefix = values.getAsString(StructuredName.PREFIX); 112622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey givenNames = values.getAsString(StructuredName.GIVEN_NAME); 113622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey middleName = values.getAsString(StructuredName.MIDDLE_NAME); 114622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey familyName = values.getAsString(StructuredName.FAMILY_NAME); 115622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey suffix = values.getAsString(StructuredName.SUFFIX); 116635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 117635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE); 118635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer; 119635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 120635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME); 121635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME); 122635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME); 123635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 124635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE); 125635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer; 126622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 127622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 128622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public void toValues(ContentValues values) { 129635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PREFIX, prefix); 130635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames); 131635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName); 132635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName); 133635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.SUFFIX, suffix); 134635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle); 135635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName); 136635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName); 137635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName); 138635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle); 139635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 140635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 141635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void putValueIfPresent(ContentValues values, String name, String value) { 142635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (value != null) { 143635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(name, value); 144635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 145635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 146635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 147635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public void clear() { 148635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov prefix = null; 149635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov givenNames = null; 150635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov middleName = null; 151635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov familyName = null; 152635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov suffix = null; 153635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = FullNameStyle.UNDEFINED; 154635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticFamilyName = null; 155635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticMiddleName = null; 156635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticGivenName = null; 157635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticNameStyle = PhoneticNameStyle.UNDEFINED; 158622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 1594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static class NameTokenizer extends StringTokenizer { 1624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final String[] mTokens; 1634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mDotBitmask; 164635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int mCommaBitmask; 1654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mStartPointer; 1664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mEndPointer; 1674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameTokenizer(String fullName) { 1694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov super(fullName, " .,", true); 1704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens = new String[MAX_TOKENS]; 1724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Iterate over tokens, skipping over empty ones and marking tokens that 1744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // are followed by dots. 1754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (hasMoreTokens() && mEndPointer < MAX_TOKENS) { 1764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final String token = nextToken(); 1774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (token.length() > 0) { 1784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final char c = token.charAt(0); 179635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (c == ' ') { 1804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov continue; 1814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mEndPointer > 0 && token.charAt(0) == '.') { 1854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mDotBitmask |= (1 << (mEndPointer - 1)); 186635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (mEndPointer > 0 && token.charAt(0) == ',') { 187635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mCommaBitmask |= (1 << (mEndPointer - 1)); 1884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 1894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens[mEndPointer] = token; 1904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mEndPointer++; 1914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 1964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Returns true if the token is followed by a dot in the original full name. 1974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public boolean hasDot(int index) { 1994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return (mDotBitmask & (1 << index)) != 0; 2004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 201635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 202635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 203635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Returns true if the token is followed by a comma in the original full name. 204635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 205635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public boolean hasComma(int index) { 206635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return (mCommaBitmask & (1 << index)) != 0; 207635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 2084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 2114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Constructor. 2124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 2134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonPrefixes comma-separated list of common prefixes, 2144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Mr, Ms, Mrs" 2154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonLastNamePrefixes comma-separated list of common last name prefixes, 216635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * e.g. "d', st, st., von" 2174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonSuffixes comma-separated list of common suffixes, 2184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Jr, M.D., MD, D.D.S." 2194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonConjunctions comma-separated list of common conjuctions, 2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "AND, Or" 2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameSplitter(String commonPrefixes, String commonLastNamePrefixes, 223622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey String commonSuffixes, String commonConjunctions, Locale locale) { 224622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey // TODO: refactor this to use <string-array> resources 2254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mPrefixesSet = convertToSet(commonPrefixes); 2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes); 2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mSuffixesSet = convertToSet(commonSuffixes); 2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mConjuctions = convertToSet(commonConjunctions); 229635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mLocale = locale != null ? locale : Locale.getDefault(); 230635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mLanguage = mLocale.getLanguage().toLowerCase(); 2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int maxLength = 0; 2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (String suffix : mSuffixesSet) { 2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (suffix.length() > maxLength) { 2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov maxLength = suffix.length(); 2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mMaxSuffixLength = maxLength; 2404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Converts a comma-separated list of Strings to a set of Strings. Trims strings 2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * and converts them to upper case. 2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static HashSet<String> convertToSet(String strings) { 2474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov HashSet<String> set = new HashSet<String>(); 2484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (strings != null) { 2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String[] split = strings.split(","); 2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = 0; i < split.length; i++) { 2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov set.add(split[i].trim().toUpperCase()); 2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return set; 2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 258f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov * Parses a full name and returns components as a list of tokens. 259f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov */ 260f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov public int tokenize(String[] tokens, String fullName) { 261f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (fullName == null) { 262f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return 0; 263f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 264f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 265f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov NameTokenizer tokenizer = new NameTokenizer(fullName); 266f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 267f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (tokenizer.mStartPointer == tokenizer.mEndPointer) { 268f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return 0; 269f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 270f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 271f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov String firstToken = tokenizer.mTokens[tokenizer.mStartPointer]; 272f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (mPrefixesSet.contains(firstToken.toUpperCase())) { 273f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov tokenizer.mStartPointer++; 274f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 275f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov int count = 0; 276f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) { 277f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov tokens[count++] = tokenizer.mTokens[i]; 278f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 279f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 280f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return count; 281f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 282f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 283f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 284f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov /** 2854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses a full name and returns parsed components in the Name object. 2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public void split(Name name, String fullName) { 2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (fullName == null) { 2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 2904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 292635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int fullNameStyle = guessFullNameStyle(fullName); 293635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (fullNameStyle == FullNameStyle.CJK) { 294635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = getAdjustedFullNameStyle(fullNameStyle); 295635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 296635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 297635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov switch (fullNameStyle) { 298635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.CHINESE: 299635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov splitChineseName(name, fullName); 300635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov break; 301635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 302635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.JAPANESE: 303635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.KOREAN: 304635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov splitJapaneseOrKoreanName(name, fullName); 305635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov break; 306635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 307635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov default: 308635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov splitWesternName(name, fullName); 309635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 310635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 311635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 312635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 313635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Western tradition: 314635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 315635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [prefix] given name(s) [[middle name] family name] [, suffix] 316635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [prefix] family name, given name [middle name] [,suffix] 317635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 318635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 319635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void splitWesternName(Name name, String fullName) { 3204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov NameTokenizer tokens = new NameTokenizer(fullName); 3214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parsePrefix(name, tokens); 322c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 323c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // If the name consists of just one or two tokens, treat them as first/last name, 324c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // not as suffix. Example: John Ma; Ma is last name, not "M.A.". 325c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov if (tokens.mEndPointer > 2) { 326c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov parseSuffix(name, tokens); 327c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov } 328c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 329c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) { 330c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov name.givenNames = tokens.mTokens[tokens.mStartPointer]; 331c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov } else { 332c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseLastName(name, tokens); 333c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseMiddleName(name, tokens); 334c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseGivenNames(name, tokens); 335c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov } 3364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 3384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 339635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Chinese tradition: 340635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 341635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [family name [middle name]] given name 342635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 343635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 344635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void splitChineseName(Name name, String fullName) { 345635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringTokenizer tokenizer = new StringTokenizer(fullName); 346635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (tokenizer.hasMoreTokens()) { 347635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String token = tokenizer.nextToken(); 348635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames == null) { 349635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 350635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.familyName == null) { 351635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = name.givenNames; 352635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 353635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.middleName == null) { 354635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = name.givenNames; 355635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 356635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 357635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = name.middleName + name.givenNames; 358635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 359635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 360635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 361635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 362635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If a single word parse that word up. 363635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames != null && name.familyName == null && name.middleName == null) { 364635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = fullName.length(); 365635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (length == 2) { 366635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 1); 367635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(1); 368635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (length == 3) { 369635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 1); 370635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = fullName.substring(1, 2); 371635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(2); 372635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (length == 4) { 373635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 2); 374635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = fullName.substring(2, 3); 375635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(3); 376635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 377635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 378635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 379635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 380635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 381635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 382635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Japanese tradition: 383635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 384635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [family name] given name(s) 385635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 386635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 387635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void splitJapaneseOrKoreanName(Name name, String fullName) { 388635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringTokenizer tokenizer = new StringTokenizer(fullName); 389635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (tokenizer.hasMoreTokens()) { 390635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String token = tokenizer.nextToken(); 391635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames == null) { 392635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 393635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.familyName == null) { 394635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = name.givenNames; 395635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 396635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 397635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames += " " + token; 398635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 399635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 400635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 401635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 402635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 403622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey * Flattens the given {@link Name} into a single field, usually for storage 404622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey * in {@link StructuredName#DISPLAY_NAME}. 405622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey */ 406622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public String join(Name name) { 407622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey final boolean hasGiven = !TextUtils.isEmpty(name.givenNames); 408622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey final boolean hasFamily = !TextUtils.isEmpty(name.familyName); 409622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 410622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey // TODO: write locale-specific blending logic here 411622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey if (hasGiven && hasFamily) { 412622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey return name.givenNames + " " + name.familyName; 413622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } else if (hasFamily) { 414622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey return name.familyName; 415622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } else if (hasGiven) { 416622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey return name.givenNames; 417622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } else { 418622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey return null; 419622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 420622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 421622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 422622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey /** 423635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Concatenates components of a name according to the rules dictated by the name style. 424635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * 425635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * @param givenNameFirst is ignored for CJK display name styles 426635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 427635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String join(Name name, boolean givenNameFirst) { 428635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov switch (name.fullNameStyle) { 429635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.CHINESE: 430635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.KOREAN: 431635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return join(name.familyName, name.middleName, name.givenNames, name.suffix, 432635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov false, false, false); 433635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 434635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.JAPANESE: 435635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return join(name.familyName, name.middleName, name.givenNames, name.suffix, 436635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov true, false, false); 437635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 438635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov default: 439635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (givenNameFirst) { 440635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return join(name.givenNames, name.middleName, name.familyName, name.suffix, 441635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov true, false, true); 442635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 443635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return join(name.familyName, name.givenNames, name.middleName, name.suffix, 444635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov true, true, true); 445635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 446635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 447635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 448635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 449635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 450635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Concatenates parts of a full name inserting spaces and commas as specified. 451635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 452635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private String join(String part1, String part2, String part3, String suffix, 453635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) { 454635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart1 = !TextUtils.isEmpty(part1); 455635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart2 = !TextUtils.isEmpty(part2); 456635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart3 = !TextUtils.isEmpty(part3); 457635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasSuffix = !TextUtils.isEmpty(suffix); 458635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 459635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean isSingleWord = true; 460635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String singleWord = null; 461635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1) { 462635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = part1; 463635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 464635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 465635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart2) { 466635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 467635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 468635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 469635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = part2; 470635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 471635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 472635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 473635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart3) { 474635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 475635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 476635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 477635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = part3; 478635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 479635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 480635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 481635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasSuffix) { 482635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 483635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 484635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 485635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = normalizedSuffix(suffix); 486635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 487635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 488635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 489635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isSingleWord) { 490635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return singleWord; 491635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 492635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 493635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringBuilder sb = new StringBuilder(); 494635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1) { 495635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part1); 496635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 497635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 498635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart2) { 499635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1) { 500635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useCommaAfterPart1) { 501635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(','); 502635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 503635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 504635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 505635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 506635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 507635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part2); 508635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 509635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 510635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart3) { 511635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1 || hasPart2) { 512635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 513635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 514635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 515635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 516635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part3); 517635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 518635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 519635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasSuffix) { 520635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1 || hasPart2 || hasPart3) { 521635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useCommaAfterPart3) { 522635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(','); 523635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 524635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 525635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 526635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 527635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 528635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(normalizedSuffix(suffix)); 529635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 530635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 531635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return sb.toString(); 532635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 533635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 534635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 535635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Puts a dot after the supplied suffix if that is the accepted form of the suffix, 536635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * e.g. "Jr." and "Sr.", but not "I", "II" and "III". 537635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 538635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private String normalizedSuffix(String suffix) { 539635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = suffix.length(); 540635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (length == 0 || suffix.charAt(length - 1) == '.') { 541635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return suffix; 542635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 543635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 544635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String withDot = suffix + '.'; 545635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (mSuffixesSet.contains(withDot.toUpperCase())) { 546635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return withDot; 547635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 548635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return suffix; 549635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 550635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 551635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 552635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 553635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * If the supplied name style is undefined, returns a default based on the language, 554635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * otherwise returns the supplied name style itself. 555635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * 556635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * @param nameStyle See {@link FullNameStyle}. 557635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 558635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int getAdjustedFullNameStyle(int nameStyle) { 559635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (nameStyle == FullNameStyle.UNDEFINED) { 560635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (JAPANESE_LANGUAGE.equals(mLanguage)) { 561635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 562635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 563635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 564635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (CHINESE_LANGUAGE.equals(mLanguage)) { 565635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CHINESE; 566635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 567635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.WESTERN; 568635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 569635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (nameStyle == FullNameStyle.CJK) { 570635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (JAPANESE_LANGUAGE.equals(mLanguage)) { 571635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 572635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 573635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 574635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 575635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CHINESE; 576635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 577635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 578635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 579635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 580635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 581635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 5824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the first word from the name if it is a prefix. 5834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 5844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parsePrefix(Name name, NameTokenizer tokens) { 5854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 5864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 5874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 5884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 5894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String firstToken = tokens.mTokens[tokens.mStartPointer]; 5904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mPrefixesSet.contains(firstToken.toUpperCase())) { 5914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.prefix = firstToken; 5924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mStartPointer++; 5934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 5944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 5954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 5964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 5974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the last word(s) from the name if it is a suffix. 5984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 5994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseSuffix(Name name, NameTokenizer tokens) { 6004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 6014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 6024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastToken = tokens.mTokens[tokens.mEndPointer - 1]; 6054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (lastToken.length() > mMaxSuffixLength) { 6064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 6074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String normalized = lastToken.toUpperCase(); 6104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 6114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 6124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 6134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 6144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 6174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken += '.'; 6184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized += "."; 6204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Take care of suffixes like M.D. and D.D.S. 6224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int pos = tokens.mEndPointer - 1; 6234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (normalized.length() <= mMaxSuffixLength) { 6244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 6264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 6274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer = pos; 6284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 6294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (pos == tokens.mStartPointer) { 6324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov break; 6334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov pos--; 6364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(pos)) { 6374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + "." + lastToken; 6384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 6394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + " " + lastToken; 6404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized; 6434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseLastName(Name name, NameTokenizer tokens) { 6474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 6484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 6494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 651635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If the first word is followed by a comma, assume that it's the family name 652635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasComma(tokens.mStartPointer)) { 653635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = tokens.mTokens[tokens.mStartPointer]; 654635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov tokens.mStartPointer++; 655635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 656635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 657635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 658635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If the second word is followed by a comma and the first word 659635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // is a last name prefix as in "de Sade" and "von Cliburn", treat 660635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // the first two words as the family name. 661635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.mStartPointer + 1 < tokens.mEndPointer 662635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov && tokens.hasComma(tokens.mStartPointer + 1) 663635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) { 664635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String familyNamePrefix = tokens.mTokens[tokens.mStartPointer]; 665635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasDot(tokens.mStartPointer)) { 666635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov familyNamePrefix += '.'; 667635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 668635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1]; 669635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov tokens.mStartPointer += 2; 670635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 671635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 672635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 673635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // Finally, assume that the last word is the last name 6744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = tokens.mTokens[tokens.mEndPointer - 1]; 6754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 6764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 677635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // Take care of last names like "de Sade" and "von Cliburn" 6784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 0) { 6794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1]; 680635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isFamilyNamePrefix(lastNamePrefix)) { 6814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 6824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastNamePrefix += '.'; 6834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = lastNamePrefix + " " + name.familyName; 6854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 6864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 6894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 690635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 691635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de" 692635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 693635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private boolean isFamilyNamePrefix(String word) { 694635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov final String normalized = word.toUpperCase(); 695635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 696635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return mLastNamePrefixesSet.contains(normalized) 697635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || mLastNamePrefixesSet.contains(normalized + "."); 698635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 699635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 7004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseMiddleName(Name name, NameTokenizer tokens) { 7024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 7034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 1) { 7074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 2 7084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2]. 7094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov toUpperCase())) { 7104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.middleName = tokens.mTokens[tokens.mEndPointer - 1]; 711635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 712635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName += '.'; 713635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 7144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 7154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseGivenNames(Name name, NameTokenizer tokens) { 7204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 7214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 1) { 7254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = tokens.mTokens[tokens.mStartPointer]; 7264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 7274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov StringBuilder sb = new StringBuilder(); 7284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) { 7294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (i != tokens.mStartPointer) { 7304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(' '); 7314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(tokens.mTokens[i]); 7334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(i)) { 7344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append('.'); 7354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = sb.toString(); 7384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 740635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 741635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 742635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Makes the best guess at the expected full name style based on the character set 743635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * used in the supplied name. If the phonetic name is also supplied, tries to 744635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * differentiate between Chinese, Japanese and Korean based on the alphabet used 745635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * for the phonetic name. 746635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 747635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public void guessNameStyle(Name name) { 748635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guessFullNameStyle(name); 749635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guessPhoneticNameStyle(name); 750635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 751635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If we were unsure about display name style based on the name components, 752635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // but phonetic name makes it more definitive, update the display name style 753635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // according to the phonetic name style. 754635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { 755635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.fullNameStyle == FullNameStyle.UNDEFINED 756635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || name.fullNameStyle == FullNameStyle.CJK) { 757635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.phoneticNameStyle == PhoneticNameStyle.JAPANESE) { 758635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = FullNameStyle.JAPANESE; 759635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.phoneticNameStyle == PhoneticNameStyle.KOREAN) { 760635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = FullNameStyle.KOREAN; 761635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 762635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.fullNameStyle == FullNameStyle.CJK 763635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov && name.phoneticNameStyle == PhoneticNameStyle.PINYIN) { 764635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = FullNameStyle.CHINESE; 765635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 766635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 767635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 768635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 769635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 770635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 771635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Makes the best guess at the expected full name style based on the character set 772635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * used in the supplied name. 773635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 774635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void guessFullNameStyle(NameSplitter.Name name) { 775635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int bestGuess = guessFullNameStyle(name.givenNames); 776635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) { 777635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = bestGuess; 778635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 779635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 780635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 781635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int guess = guessFullNameStyle(name.familyName); 782635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 783635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 784635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = guess; 785635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 786635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 787635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 788635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 789635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 790635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guess = guessFullNameStyle(name.middleName); 791635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 792635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 793635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = guess; 794635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 795635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 796635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 797635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 798635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 799635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = bestGuess; 800635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 801635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 802635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int guessFullNameStyle(String name) { 803635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name == null) { 804635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.UNDEFINED; 805635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 806635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 807635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int nameStyle = FullNameStyle.UNDEFINED; 808635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 809635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int offset = 0; 810635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 811635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 812635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 813635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 814635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 815635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isLatinUnicodeBlock(unicodeBlock)) { 816635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.WESTERN; 817635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 818635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 819635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isCJKUnicodeBlock(unicodeBlock)) { 820635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 821635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // We don't know if this is Chinese, Japanese or Korean - 822635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // trying to figure out by looking at other characters in the name 823635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return guessCJKNameStyle(name, offset + Character.charCount(codePoint)); 824635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 825635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 826635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 827635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 828635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 829635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 830635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isKoreanUnicodeBlock(unicodeBlock)) { 831635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 832635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 833635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 834635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.WESTERN; 835635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 836635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 837635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 838635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 839635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 840635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 841635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int guessCJKNameStyle(String name, int offset) { 842635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 843635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 844635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 845635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 846635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 847635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 848635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 849635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 850635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isKoreanUnicodeBlock(unicodeBlock)) { 851635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 852635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 853635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 854635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 855635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 856635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 857635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CJK; 858635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 859635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 860635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void guessPhoneticNameStyle(NameSplitter.Name name) { 861635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName); 862635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) { 863635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = bestGuess; 864635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 865635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 866635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 867635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int guess = guessPhoneticNameStyle(name.phoneticGivenName); 868635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 869635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 870635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = guess; 871635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 872635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 873635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 874635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 875635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 876635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guess = guessPhoneticNameStyle(name.phoneticMiddleName); 877635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 878635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 879635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = guess; 880635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 881635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 882635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 883635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 884635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 885635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 886635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int guessPhoneticNameStyle(String name) { 887635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name == null) { 888635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.UNDEFINED; 889635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 890635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 891635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int nameStyle = PhoneticNameStyle.UNDEFINED; 892635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 893635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int offset = 0; 894635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 895635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 896635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 897635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 898635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 899635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.JAPANESE; 900635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 901635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isKoreanUnicodeBlock(unicodeBlock)) { 902635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.KOREAN; 903635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 904635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isLatinUnicodeBlock(unicodeBlock)) { 905635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.PINYIN; 906635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 907635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 908635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 909635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 910635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 911635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 912635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 913635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 914635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) { 915635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.BASIC_LATIN || 916635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT || 917635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A || 918635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B || 919635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL; 920635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 921635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 922635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isCJKUnicodeBlock(UnicodeBlock block) { 923635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS 924635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 925635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 926635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION 927635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT 928635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY 929635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS 930635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS 931635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT; 932635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 933635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 934635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) { 935635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES || 936635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HANGUL_JAMO || 937635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO; 938635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 939635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 940635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) { 941635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.KATAKANA || 942635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS || 943635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || 944635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HIRAGANA; 945635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 9464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov} 947