14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/* 24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project 34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License"); 54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License. 64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at 74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * http://www.apache.org/licenses/LICENSE-2.0 94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software 114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS, 124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and 144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License 154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts; 174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 18622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport android.content.ContentValues; 1938210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport android.provider.ContactsContract.CommonDataKinds.StructuredName; 20635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.FullNameStyle; 21635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.PhoneticNameStyle; 22f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikovimport android.text.TextUtils; 23622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 2438210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport com.android.providers.contacts.util.NeededForTesting; 2538210445730ee04c351c7cc1b3800cfe23e34325Makoto Onuki 26635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport java.lang.Character.UnicodeBlock; 274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet; 28622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport java.util.Locale; 294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer; 304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/** 324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last 334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has 344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect. 354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p> 364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm: 374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol> 384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li> 394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li> 404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li> 414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use 424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li> 434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li> 444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol> 454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter { 474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 48f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov public static final int MAX_TOKENS = 10; 49f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 50635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 51635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase(); 52635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 53635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // This includes simplified and traditional Chinese 54635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase(); 55635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mPrefixesSet; 574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mSuffixesSet; 584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final int mMaxSuffixLength; 594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mLastNamePrefixesSet; 604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final HashSet<String> mConjuctions; 61622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey private final Locale mLocale; 62635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private final String mLanguage; 634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 6456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee /** 6556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * Two-Chracter long Korean family names. 6656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1 6756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee */ 6856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = { 6956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uAC15\uC804", // Gang Jeon 7056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uB0A8\uAD81", // Nam Goong 7156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uB3C5\uACE0", // Dok Go 7256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uB3D9\uBC29", // Dong Bang 7356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uB9DD\uC808", // Mang Jeol 7456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC0AC\uACF5", // Sa Gong 7556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC11C\uBB38", // Seo Moon 7656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC120\uC6B0", // Seon Woo 7756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC18C\uBD09", // So Bong 7856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC5B4\uAE08", // Uh Geum 7956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC7A5\uACE1", // Jang Gok 8056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uC81C\uAC08", // Je Gal 8156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee "\uD669\uBCF4" // Hwang Bo 8256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee }; 8356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public static class Name { 85635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String prefix; 86635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String givenNames; 87635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String middleName; 88635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String familyName; 89635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String suffix; 90635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 91635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int fullNameStyle; 92635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 93635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticFamilyName; 94635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticMiddleName; 95635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public String phoneticGivenName; 96635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 97635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int phoneticNameStyle; 984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 99622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public Name() { 100622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 101622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 102622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public Name(String prefix, String givenNames, String middleName, String familyName, 103622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey String suffix) { 104622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.prefix = prefix; 105622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.givenNames = givenNames; 106622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.middleName = middleName; 107622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.familyName = familyName; 108622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey this.suffix = suffix; 109622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 110622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 11110178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki @NeededForTesting 1124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getPrefix() { 1134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return prefix; 1144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getGivenNames() { 1174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return givenNames; 1184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getMiddleName() { 1214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return middleName; 1224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 1244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getFamilyName() { 1254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return familyName; 1264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 1274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 12810178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki @NeededForTesting 1294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public String getSuffix() { 1304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return suffix; 1314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 132622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 133ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov public int getFullNameStyle() { 134ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov return fullNameStyle; 135ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov } 136ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov 137ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov public String getPhoneticFamilyName() { 138ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov return phoneticFamilyName; 139ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov } 140ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov 141ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov public String getPhoneticMiddleName() { 142ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov return phoneticMiddleName; 143ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov } 144ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov 145ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov public String getPhoneticGivenName() { 146ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov return phoneticGivenName; 147ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov } 148ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov 149ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov public int getPhoneticNameStyle() { 150ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov return phoneticNameStyle; 151ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov } 152ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov 153622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public void fromValues(ContentValues values) { 154622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey prefix = values.getAsString(StructuredName.PREFIX); 155622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey givenNames = values.getAsString(StructuredName.GIVEN_NAME); 156622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey middleName = values.getAsString(StructuredName.MIDDLE_NAME); 157622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey familyName = values.getAsString(StructuredName.FAMILY_NAME); 158622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey suffix = values.getAsString(StructuredName.SUFFIX); 159635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 160635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE); 161635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer; 162635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 163635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME); 164635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME); 165635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME); 166635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 167635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE); 168635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer; 169622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 170622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey 171622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey public void toValues(ContentValues values) { 172635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PREFIX, prefix); 173635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames); 174635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName); 175635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName); 176635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.SUFFIX, suffix); 177635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle); 178635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName); 179635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName); 180635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName); 181635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle); 182635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 183635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 184635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void putValueIfPresent(ContentValues values, String name, String value) { 185635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (value != null) { 186635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov values.put(name, value); 187635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 188635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 189635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 190635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public void clear() { 191635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov prefix = null; 192635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov givenNames = null; 193635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov middleName = null; 194635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov familyName = null; 195635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov suffix = null; 196635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = FullNameStyle.UNDEFINED; 197635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticFamilyName = null; 198635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticMiddleName = null; 199635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticGivenName = null; 200635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov phoneticNameStyle = PhoneticNameStyle.UNDEFINED; 201622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey } 2025dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 2035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public boolean isEmpty() { 2045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return TextUtils.isEmpty(givenNames) 2055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(middleName) 2065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(familyName) 2075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(suffix) 2085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(phoneticFamilyName) 2095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(phoneticMiddleName) 2105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov && TextUtils.isEmpty(phoneticGivenName); 2115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 2125dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 2135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov @Override 2145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public String toString() { 21517a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName 21617a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov + " family: " + familyName + " suffix: " + suffix + " ph/given: " 21717a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: " 21817a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov + phoneticFamilyName + "]"; 2195dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static class NameTokenizer extends StringTokenizer { 2234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private final String[] mTokens; 2244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mDotBitmask; 225635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int mCommaBitmask; 2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mStartPointer; 2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private int mEndPointer; 2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameTokenizer(String fullName) { 2304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov super(fullName, " .,", true); 2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens = new String[MAX_TOKENS]; 2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Iterate over tokens, skipping over empty ones and marking tokens that 2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // are followed by dots. 2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (hasMoreTokens() && mEndPointer < MAX_TOKENS) { 2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final String token = nextToken(); 2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (token.length() > 0) { 2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov final char c = token.charAt(0); 240635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (c == ' ') { 2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov continue; 2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mEndPointer > 0 && token.charAt(0) == '.') { 2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mDotBitmask |= (1 << (mEndPointer - 1)); 247635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (mEndPointer > 0 && token.charAt(0) == ',') { 248635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mCommaBitmask |= (1 << (mEndPointer - 1)); 2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mTokens[mEndPointer] = token; 2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mEndPointer++; 2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Returns true if the token is followed by a dot in the original full name. 2584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 2594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public boolean hasDot(int index) { 2604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return (mDotBitmask & (1 << index)) != 0; 2614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 262635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 263635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 264635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Returns true if the token is followed by a comma in the original full name. 265635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 266635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public boolean hasComma(int index) { 267635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return (mCommaBitmask & (1 << index)) != 0; 268635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 2694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 2724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Constructor. 2734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * 2744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonPrefixes comma-separated list of common prefixes, 2754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Mr, Ms, Mrs" 2764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonLastNamePrefixes comma-separated list of common last name prefixes, 277635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * e.g. "d', st, st., von" 2784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonSuffixes comma-separated list of common suffixes, 2794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "Jr, M.D., MD, D.D.S." 2804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * @param commonConjunctions comma-separated list of common conjuctions, 2814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * e.g. "AND, Or" 2824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 2834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public NameSplitter(String commonPrefixes, String commonLastNamePrefixes, 284622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey String commonSuffixes, String commonConjunctions, Locale locale) { 285622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey // TODO: refactor this to use <string-array> resources 2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mPrefixesSet = convertToSet(commonPrefixes); 2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes); 2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mSuffixesSet = convertToSet(commonSuffixes); 2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mConjuctions = convertToSet(commonConjunctions); 290635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mLocale = locale != null ? locale : Locale.getDefault(); 291635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov mLanguage = mLocale.getLanguage().toLowerCase(); 2924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 2934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int maxLength = 0; 2944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (String suffix : mSuffixesSet) { 2954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (suffix.length() > maxLength) { 2964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov maxLength = suffix.length(); 2974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 2994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 3004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov mMaxSuffixLength = maxLength; 3014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 3034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 3044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Converts a comma-separated list of Strings to a set of Strings. Trims strings 3054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * and converts them to upper case. 3064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 3074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private static HashSet<String> convertToSet(String strings) { 3084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov HashSet<String> set = new HashSet<String>(); 3094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (strings != null) { 3104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String[] split = strings.split(","); 3114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = 0; i < split.length; i++) { 3124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov set.add(split[i].trim().toUpperCase()); 3134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return set; 3164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 3184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 319f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov * Parses a full name and returns components as a list of tokens. 320f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov */ 321f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov public int tokenize(String[] tokens, String fullName) { 322f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (fullName == null) { 323f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return 0; 324f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 325f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 326f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov NameTokenizer tokenizer = new NameTokenizer(fullName); 327f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 328f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov if (tokenizer.mStartPointer == tokenizer.mEndPointer) { 329f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return 0; 330f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 331f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 332f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov String firstToken = tokenizer.mTokens[tokenizer.mStartPointer]; 333f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov int count = 0; 334f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) { 335f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov tokens[count++] = tokenizer.mTokens[i]; 336f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 337f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 338f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov return count; 339f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov } 340f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 341f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov 342f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov /** 3434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses a full name and returns parsed components in the Name object. 3444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 3454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov public void split(Name name, String fullName) { 3464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (fullName == null) { 3474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 3484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 3494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 350635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int fullNameStyle = guessFullNameStyle(fullName); 351635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (fullNameStyle == FullNameStyle.CJK) { 352635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov fullNameStyle = getAdjustedFullNameStyle(fullNameStyle); 353635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 354635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 35556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee split(name, fullName, fullNameStyle); 35656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 35756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 35856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee /** 35956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * Parses a full name and returns parsed components in the Name object 36056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * with a given fullNameStyle. 36156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee */ 36256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee public void split(Name name, String fullName, int fullNameStyle) { 36356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee if (fullName == null) { 36456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee return; 36556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 36656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 3675dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov name.fullNameStyle = fullNameStyle; 3685dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 369635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov switch (fullNameStyle) { 370635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.CHINESE: 371635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov splitChineseName(name, fullName); 372635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov break; 373635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 374635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.JAPANESE: 37556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee splitJapaneseName(name, fullName); 37656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee break; 37756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 378635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.KOREAN: 37956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee splitKoreanName(name, fullName); 380635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov break; 381635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 382635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov default: 383635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov splitWesternName(name, fullName); 384635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 385635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 386635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 387635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 388635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Western tradition: 389635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 390635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [prefix] given name(s) [[middle name] family name] [, suffix] 391635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [prefix] family name, given name [middle name] [,suffix] 392635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 393635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 394635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void splitWesternName(Name name, String fullName) { 3954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov NameTokenizer tokens = new NameTokenizer(fullName); 3964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov parsePrefix(name, tokens); 397c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 398c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // If the name consists of just one or two tokens, treat them as first/last name, 399c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov // not as suffix. Example: John Ma; Ma is last name, not "M.A.". 400c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov if (tokens.mEndPointer > 2) { 401c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov parseSuffix(name, tokens); 402c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov } 403c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov 404c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) { 405c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov name.givenNames = tokens.mTokens[tokens.mStartPointer]; 406c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov } else { 407c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseLastName(name, tokens); 408c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseMiddleName(name, tokens); 409c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov parseGivenNames(name, tokens); 410c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov } 4114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 4124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 4134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 414635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Chinese tradition: 415635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 416635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [family name [middle name]] given name 417635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 418635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 419635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void splitChineseName(Name name, String fullName) { 420635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringTokenizer tokenizer = new StringTokenizer(fullName); 421635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (tokenizer.hasMoreTokens()) { 422635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String token = tokenizer.nextToken(); 423635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames == null) { 424635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 425635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.familyName == null) { 426635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = name.givenNames; 427635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 428635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.middleName == null) { 429635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = name.givenNames; 430635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 431635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 432635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = name.middleName + name.givenNames; 433635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 434635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 435635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 436635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 437635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If a single word parse that word up. 438635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames != null && name.familyName == null && name.middleName == null) { 439635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = fullName.length(); 440635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (length == 2) { 441635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 1); 442635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(1); 443635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (length == 3) { 444635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 1); 445635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = fullName.substring(1, 2); 446635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(2); 447635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (length == 4) { 448635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = fullName.substring(0, 2); 449635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName = fullName.substring(2, 3); 450635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = fullName.substring(3); 451635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 452635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 453635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 454635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 455635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 456635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 457635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Splits a full name composed according to the Japanese tradition: 458635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * <pre> 459635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * [family name] given name(s) 460635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * </pre> 461635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 46256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee private void splitJapaneseName(Name name, String fullName) { 463635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringTokenizer tokenizer = new StringTokenizer(fullName); 464635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (tokenizer.hasMoreTokens()) { 465635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String token = tokenizer.nextToken(); 466635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name.givenNames == null) { 467635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 468635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (name.familyName == null) { 469635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = name.givenNames; 470635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames = token; 471635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 472635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.givenNames += " " + token; 473635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 474635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 475635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 476635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 477635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 47856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * Splits a full name composed according to the Korean tradition: 47956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * <pre> 48056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * [family name] given name(s) 48156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee * </pre> 48256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee */ 48356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee private void splitKoreanName(Name name, String fullName) { 48456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee StringTokenizer tokenizer = new StringTokenizer(fullName); 48556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee if (tokenizer.countTokens() > 1) { 48656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee // Each name can be identified by separators. 48756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee while (tokenizer.hasMoreTokens()) { 48856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee String token = tokenizer.nextToken(); 48956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee if (name.givenNames == null) { 49056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.givenNames = token; 49156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } else if (name.familyName == null) { 49256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.familyName = name.givenNames; 49356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.givenNames = token; 49456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } else { 49556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.givenNames += " " + token; 49656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 49756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 49856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } else { 49956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee // There is no separator. Try to guess family name. 50056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee // The length of most family names is 1. 50156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee int familyNameLength = 1; 50256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 50356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee // Compare with 2-length family names. 50456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) { 50556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee if (fullName.startsWith(twoLengthFamilyName)) { 50656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee familyNameLength = 2; 50756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee break; 50856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 50956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 51056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 51156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.familyName = fullName.substring(0, familyNameLength); 51256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee if (fullName.length() > familyNameLength) { 51356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee name.givenNames = fullName.substring(familyNameLength); 51456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 51556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 51656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee } 51756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee 51856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee /** 519635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Concatenates components of a name according to the rules dictated by the name style. 520635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * 521635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * @param givenNameFirst is ignored for CJK display name styles 522635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 52355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov public String join(Name name, boolean givenNameFirst, boolean includePrefix) { 52455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov String prefix = includePrefix ? name.prefix : null; 525635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov switch (name.fullNameStyle) { 526cdd03b2ba03718a7fa85663a2438136284a1557cBai Tao case FullNameStyle.CJK: 527635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.CHINESE: 528635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.KOREAN: 52955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return join(prefix, name.familyName, name.middleName, name.givenNames, 53055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.suffix, false, false, false); 531635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 532635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov case FullNameStyle.JAPANESE: 53355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return join(prefix, name.familyName, name.middleName, name.givenNames, 53455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.suffix, true, false, false); 535635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 536635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov default: 537635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (givenNameFirst) { 53855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return join(prefix, name.givenNames, name.middleName, name.familyName, 53955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.suffix, true, false, true); 540635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 54155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return join(prefix, name.familyName, name.givenNames, name.middleName, 54255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.suffix, true, true, true); 543635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 544635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 545635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 546635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 547635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 5485dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov * Concatenates components of the phonetic name following the CJK tradition: 5495dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov * family name + middle name + given name(s). 5505dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov */ 5515dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public String joinPhoneticName(Name name) { 55255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return join(null, name.phoneticFamilyName, 55355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false); 5545dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 5555dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 5565dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov /** 557635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Concatenates parts of a full name inserting spaces and commas as specified. 558635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 55955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov private String join(String prefix, String part1, String part2, String part3, String suffix, 560635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) { 56155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov prefix = prefix == null ? null: prefix.trim(); 562dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov part1 = part1 == null ? null: part1.trim(); 563dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov part2 = part2 == null ? null: part2.trim(); 564dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov part3 = part3 == null ? null: part3.trim(); 565dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov suffix = suffix == null ? null: suffix.trim(); 566dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov 56755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov boolean hasPrefix = !TextUtils.isEmpty(prefix); 568635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart1 = !TextUtils.isEmpty(part1); 569635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart2 = !TextUtils.isEmpty(part2); 570635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasPart3 = !TextUtils.isEmpty(part3); 571635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean hasSuffix = !TextUtils.isEmpty(suffix); 572635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 573635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov boolean isSingleWord = true; 574635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String singleWord = null; 57555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 57655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix) { 57755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov singleWord = prefix; 57855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 57955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 580635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1) { 58155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (singleWord != null) { 58255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov isSingleWord = false; 58355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } else { 58455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov singleWord = part1; 58555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 586635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 587635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 588635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart2) { 589635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 590635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 591635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 592635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = part2; 593635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 594635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 595635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 596635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart3) { 597635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 598635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 599635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 600635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = part3; 601635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 602635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 603635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 604635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasSuffix) { 605635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (singleWord != null) { 606635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov isSingleWord = false; 607635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 608635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov singleWord = normalizedSuffix(suffix); 609635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 610635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 611635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 612635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isSingleWord) { 613635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return singleWord; 614635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 615635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 616635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov StringBuilder sb = new StringBuilder(); 61755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 61855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix) { 61955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov sb.append(prefix); 62055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 62155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 622635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart1) { 62355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix) { 62455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov sb.append(' '); 62555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 626635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part1); 627635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 628635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 629635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart2) { 63055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix || hasPart1) { 631635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useCommaAfterPart1) { 632635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(','); 633635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 634635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 635635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 636635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 637635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 638635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part2); 639635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 640635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 641635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasPart3) { 64255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix || hasPart1 || hasPart2) { 643635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 644635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 645635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 646635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 647635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(part3); 648635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 649635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 650635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (hasSuffix) { 65155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (hasPrefix || hasPart1 || hasPart2 || hasPart3) { 652635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useCommaAfterPart3) { 653635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(','); 654635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 655635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (useSpace) { 656635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(' '); 657635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 658635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 659635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov sb.append(normalizedSuffix(suffix)); 660635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 661635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 662635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return sb.toString(); 663635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 664635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 665635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 666635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Puts a dot after the supplied suffix if that is the accepted form of the suffix, 667635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * e.g. "Jr." and "Sr.", but not "I", "II" and "III". 668635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 669635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private String normalizedSuffix(String suffix) { 670635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = suffix.length(); 671635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (length == 0 || suffix.charAt(length - 1) == '.') { 672635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return suffix; 673635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 674635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 675635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String withDot = suffix + '.'; 676635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (mSuffixesSet.contains(withDot.toUpperCase())) { 677635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return withDot; 678635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 679635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return suffix; 680635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 681635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 682635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 683635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 684635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * If the supplied name style is undefined, returns a default based on the language, 685635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * otherwise returns the supplied name style itself. 686635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * 687635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * @param nameStyle See {@link FullNameStyle}. 688635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 689635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public int getAdjustedFullNameStyle(int nameStyle) { 690635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (nameStyle == FullNameStyle.UNDEFINED) { 691635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (JAPANESE_LANGUAGE.equals(mLanguage)) { 692635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 693635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 694635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 695635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (CHINESE_LANGUAGE.equals(mLanguage)) { 696635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CHINESE; 697635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 698635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.WESTERN; 699635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 700635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (nameStyle == FullNameStyle.CJK) { 701635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (JAPANESE_LANGUAGE.equals(mLanguage)) { 702635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 703635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 704635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 705635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } else { 706635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CHINESE; 707635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 708635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 709635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 710635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 711635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 712635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 7134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the first word from the name if it is a prefix. 7144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 7154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parsePrefix(Name name, NameTokenizer tokens) { 7164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 7174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String firstToken = tokens.mTokens[tokens.mStartPointer]; 7214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mPrefixesSet.contains(firstToken.toUpperCase())) { 72217a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov if (tokens.hasDot(tokens.mStartPointer)) { 72317a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov firstToken += '.'; 72417a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov } 7254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.prefix = firstToken; 7264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mStartPointer++; 7274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov /** 7314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Parses the last word(s) from the name if it is a suffix. 7324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */ 7334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseSuffix(Name name, NameTokenizer tokens) { 7344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 7354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastToken = tokens.mTokens[tokens.mEndPointer - 1]; 73955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 74055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov // Take care of an explicit comma-separated suffix 74155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (tokens.mEndPointer - tokens.mStartPointer > 2 74255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov && tokens.hasComma(tokens.mEndPointer - 2)) { 74355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 74455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov lastToken += '.'; 74555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 74655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.suffix = lastToken; 74755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov tokens.mEndPointer--; 74855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return; 74955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 75055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 7514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (lastToken.length() > mMaxSuffixLength) { 7524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String normalized = lastToken.toUpperCase(); 7564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 7574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 7584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 7594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 7634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken += '.'; 7644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized += "."; 7664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov // Take care of suffixes like M.D. and D.D.S. 7684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov int pos = tokens.mEndPointer - 1; 7694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov while (normalized.length() <= mMaxSuffixLength) { 7704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (mSuffixesSet.contains(normalized)) { 7724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.suffix = lastToken; 7734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer = pos; 7744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (pos == tokens.mStartPointer) { 7784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov break; 7794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov pos--; 7824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(pos)) { 7834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + "." + lastToken; 7844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 7854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastToken = tokens.mTokens[pos] + " " + lastToken; 7864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized; 7894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 7924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseLastName(Name name, NameTokenizer tokens) { 7934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 7944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 7954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 7964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 797635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If the first word is followed by a comma, assume that it's the family name 798635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasComma(tokens.mStartPointer)) { 799635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = tokens.mTokens[tokens.mStartPointer]; 800635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov tokens.mStartPointer++; 801635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 802635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 803635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 804635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // If the second word is followed by a comma and the first word 805635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // is a last name prefix as in "de Sade" and "von Cliburn", treat 806635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // the first two words as the family name. 807635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.mStartPointer + 1 < tokens.mEndPointer 808635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov && tokens.hasComma(tokens.mStartPointer + 1) 809635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) { 810635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov String familyNamePrefix = tokens.mTokens[tokens.mStartPointer]; 811635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasDot(tokens.mStartPointer)) { 812635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov familyNamePrefix += '.'; 813635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 814635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1]; 815635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov tokens.mStartPointer += 2; 816635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 817635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 818635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 819635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // Finally, assume that the last word is the last name 8204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = tokens.mTokens[tokens.mEndPointer - 1]; 8214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 8224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 823635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov // Take care of last names like "de Sade" and "von Cliburn" 8244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 0) { 8254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1]; 826635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isFamilyNamePrefix(lastNamePrefix)) { 8274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 8284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov lastNamePrefix += '.'; 8294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.familyName = lastNamePrefix + " " + name.familyName; 8314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 8324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 836635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 837635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de" 838635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 839635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private boolean isFamilyNamePrefix(String word) { 840635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov final String normalized = word.toUpperCase(); 841635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 842635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return mLastNamePrefixesSet.contains(normalized) 843635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || mLastNamePrefixesSet.contains(normalized + "."); 844635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 845635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 8464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 8474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseMiddleName(Name name, NameTokenizer tokens) { 8484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 8494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 8504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 8524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) > 1) { 8534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 2 8544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2]. 8554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov toUpperCase())) { 8564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.middleName = tokens.mTokens[tokens.mEndPointer - 1]; 857635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (tokens.hasDot(tokens.mEndPointer - 1)) { 858635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.middleName += '.'; 859635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 8604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov tokens.mEndPointer--; 8614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 8654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov private void parseGivenNames(Name name, NameTokenizer tokens) { 8664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.mStartPointer == tokens.mEndPointer) { 8674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov return; 8684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov 8704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if ((tokens.mEndPointer - tokens.mStartPointer) == 1) { 8714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = tokens.mTokens[tokens.mStartPointer]; 8724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } else { 8734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov StringBuilder sb = new StringBuilder(); 8744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) { 8754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (i != tokens.mStartPointer) { 8764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(' '); 8774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append(tokens.mTokens[i]); 8794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov if (tokens.hasDot(i)) { 8804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov sb.append('.'); 8814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov name.givenNames = sb.toString(); 8844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 8854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov } 886635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 887635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 888635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Makes the best guess at the expected full name style based on the character set 889635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * used in the supplied name. If the phonetic name is also supplied, tries to 890635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * differentiate between Chinese, Japanese and Korean based on the alphabet used 891635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * for the phonetic name. 892635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 893635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov public void guessNameStyle(Name name) { 894635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guessFullNameStyle(name); 895635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guessPhoneticNameStyle(name); 8965dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle, 8975dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov name.phoneticNameStyle); 8985dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 899635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 9005dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov /** 9015dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov * Updates the display name style according to the phonetic name style if we 9025dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov * were unsure about display name style based on the name components, but 9035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov * phonetic name makes it more definitive. 9045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov */ 9055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) { 9065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { 9075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) { 9085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) { 9095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return FullNameStyle.JAPANESE; 9105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) { 9115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return FullNameStyle.KOREAN; 912635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 9135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) { 9145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return FullNameStyle.CHINESE; 915635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 916635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 917635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 9185dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return nameStyle; 919635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 920635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 921635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov /** 922635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * Makes the best guess at the expected full name style based on the character set 923635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov * used in the supplied name. 924635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov */ 925635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void guessFullNameStyle(NameSplitter.Name name) { 9265dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (name.fullNameStyle != FullNameStyle.UNDEFINED) { 9275dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return; 9285dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 9295dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 930635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int bestGuess = guessFullNameStyle(name.givenNames); 9314cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao // A mix of Hanzi and latin chars are common in China, so we have to go through all names 9324cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao // if the name is not JANPANESE or KOREAN. 9334cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK 9344cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao && bestGuess != FullNameStyle.WESTERN) { 935635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = bestGuess; 936635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 937635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 938635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 939635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int guess = guessFullNameStyle(name.familyName); 940635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 9414cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 942635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = guess; 943635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 944635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 945635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 946635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 947635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 948635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guess = guessFullNameStyle(name.middleName); 949635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 9504cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 951635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = guess; 952635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 953635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 954635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 955635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 956635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 95755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov guess = guessFullNameStyle(name.prefix); 95855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 95955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 96055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.fullNameStyle = guess; 96155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return; 96255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 96355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov bestGuess = guess; 96455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 96555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 96655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov guess = guessFullNameStyle(name.suffix); 96755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 96855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 96955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov name.fullNameStyle = guess; 97055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov return; 97155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 97255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov bestGuess = guess; 97355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov } 97455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov 975635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.fullNameStyle = bestGuess; 976635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 977635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 9785dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public int guessFullNameStyle(String name) { 979635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name == null) { 980635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.UNDEFINED; 981635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 982635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 983635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int nameStyle = FullNameStyle.UNDEFINED; 984635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 985635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int offset = 0; 986635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 987635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 988635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 989635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 990635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 9914cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (!isLatinUnicodeBlock(unicodeBlock)) { 992635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 9934cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (isCJKUnicodeBlock(unicodeBlock)) { 9944cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao // We don't know if this is Chinese, Japanese or Korean - 9954cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao // trying to figure out by looking at other characters in the name 9964cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao return guessCJKNameStyle(name, offset + Character.charCount(codePoint)); 9974cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao } 998635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 9994cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 10004cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao return FullNameStyle.JAPANESE; 10014cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao } 1002635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 10034cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao if (isKoreanUnicodeBlock(unicodeBlock)) { 10044cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao return FullNameStyle.KOREAN; 10054cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao } 1006635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 10074cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao nameStyle = FullNameStyle.WESTERN; 1008635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1009635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 1010635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1011635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 1012635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1013635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1014635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private int guessCJKNameStyle(String name, int offset) { 1015635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 1016635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 1017635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 1018635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 1019635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 1020635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 1021635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.JAPANESE; 1022635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1023635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isKoreanUnicodeBlock(unicodeBlock)) { 1024635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.KOREAN; 1025635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1026635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1027635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 1028635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1029635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1030635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return FullNameStyle.CJK; 1031635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1032635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1033635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private void guessPhoneticNameStyle(NameSplitter.Name name) { 10345dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { 10355dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov return; 10365dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov } 10375dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov 1038635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName); 1039635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) { 1040635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = bestGuess; 1041635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 1042635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1043635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1044635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int guess = guessPhoneticNameStyle(name.phoneticGivenName); 1045635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 1046635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 1047635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = guess; 1048635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 1049635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1050635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 1051635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1052635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1053635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov guess = guessPhoneticNameStyle(name.phoneticMiddleName); 1054635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.UNDEFINED) { 1055635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (guess != FullNameStyle.CJK) { 1056635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov name.phoneticNameStyle = guess; 1057635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return; 1058635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1059635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov bestGuess = guess; 1060635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1061635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1062635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 10635dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov public int guessPhoneticNameStyle(String name) { 1064635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (name == null) { 1065635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.UNDEFINED; 1066635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1067635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1068635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int nameStyle = PhoneticNameStyle.UNDEFINED; 1069635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int length = name.length(); 1070635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int offset = 0; 1071635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov while (offset < length) { 1072635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov int codePoint = Character.codePointAt(name, offset); 1073635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (Character.isLetter(codePoint)) { 1074635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 1075635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 1076635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.JAPANESE; 1077635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1078635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isKoreanUnicodeBlock(unicodeBlock)) { 1079635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.KOREAN; 1080635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1081635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov if (isLatinUnicodeBlock(unicodeBlock)) { 1082635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return PhoneticNameStyle.PINYIN; 1083635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1084635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1085635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov offset += Character.charCount(codePoint); 1086635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1087635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1088635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return nameStyle; 1089635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1090635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1091635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) { 1092635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.BASIC_LATIN || 1093635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT || 1094635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A || 1095635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B || 1096635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL; 1097635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1098635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1099635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isCJKUnicodeBlock(UnicodeBlock block) { 1100635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS 1101635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1102635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1103635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION 1104635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT 1105635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY 1106635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS 1107635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS 1108635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT; 1109635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1110635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1111635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) { 1112635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES || 1113635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HANGUL_JAMO || 1114635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO; 1115635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 1116635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov 1117635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) { 1118635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov return unicodeBlock == UnicodeBlock.KATAKANA || 1119635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS || 1120635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || 1121635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov unicodeBlock == UnicodeBlock.HIRAGANA; 1122635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov } 11234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov} 1124