14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/*
24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project
34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License.
64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at
74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and
144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License
154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts;
174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
18622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport android.content.ContentValues;
1938210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport android.provider.ContactsContract.CommonDataKinds.StructuredName;
20635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.FullNameStyle;
21635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.PhoneticNameStyle;
22f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikovimport android.text.TextUtils;
23622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
2438210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport com.android.providers.contacts.util.NeededForTesting;
2538210445730ee04c351c7cc1b3800cfe23e34325Makoto Onuki
26635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport java.lang.Character.UnicodeBlock;
274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet;
28622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport java.util.Locale;
294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer;
304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/**
324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last
334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has
344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect.
354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p>
364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm:
374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol>
384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li>
414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li>
434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li>
444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol>
454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter {
474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
48f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    public static final int MAX_TOKENS = 10;
49f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
50635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
51635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
52635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
53635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    // This includes simplified and traditional Chinese
54635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
55635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mPrefixesSet;
574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mSuffixesSet;
584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final int mMaxSuffixLength;
594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mLastNamePrefixesSet;
604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mConjuctions;
61622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey    private final Locale mLocale;
62635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private final String mLanguage;
634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
6456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
6556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Two-Chracter long Korean family names.
6656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
6756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
6856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
6956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uAC15\uC804", // Gang Jeon
7056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB0A8\uAD81", // Nam Goong
7156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB3C5\uACE0", // Dok Go
7256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB3D9\uBC29", // Dong Bang
7356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB9DD\uC808", // Mang Jeol
7456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC0AC\uACF5", // Sa Gong
7556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC11C\uBB38", // Seo Moon
7656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC120\uC6B0", // Seon Woo
7756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC18C\uBD09", // So Bong
7856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC5B4\uAE08", // Uh Geum
7956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC7A5\uACE1", // Jang Gok
8056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC81C\uAC08", // Je Gal
8156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uD669\uBCF4"  // Hwang Bo
8256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    };
8356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public static class Name {
85635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String prefix;
86635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String givenNames;
87635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String middleName;
88635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String familyName;
89635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String suffix;
90635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
91635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public int fullNameStyle;
92635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
93635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticFamilyName;
94635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticMiddleName;
95635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticGivenName;
96635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
97635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public int phoneticNameStyle;
984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
99622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public Name() {
100622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
101622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
102622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public Name(String prefix, String givenNames, String middleName, String familyName,
103622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey                String suffix) {
104622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.prefix = prefix;
105622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.givenNames = givenNames;
106622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.middleName = middleName;
107622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.familyName = familyName;
108622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.suffix = suffix;
109622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
110622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
11110178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki        @NeededForTesting
1124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getPrefix() {
1134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return prefix;
1144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getGivenNames() {
1174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return givenNames;
1184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getMiddleName() {
1214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return middleName;
1224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getFamilyName() {
1254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return familyName;
1264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
12810178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki        @NeededForTesting
1294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getSuffix() {
1304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return suffix;
1314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
132622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
133ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public int getFullNameStyle() {
134ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return fullNameStyle;
135ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
136ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
137ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticFamilyName() {
138ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticFamilyName;
139ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
140ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
141ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticMiddleName() {
142ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticMiddleName;
143ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
144ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
145ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticGivenName() {
146ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticGivenName;
147ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
148ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
149ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public int getPhoneticNameStyle() {
150ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticNameStyle;
151ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
152ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
153622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public void fromValues(ContentValues values) {
154622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            prefix = values.getAsString(StructuredName.PREFIX);
155622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            givenNames = values.getAsString(StructuredName.GIVEN_NAME);
156622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            middleName = values.getAsString(StructuredName.MIDDLE_NAME);
157622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            familyName = values.getAsString(StructuredName.FAMILY_NAME);
158622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            suffix = values.getAsString(StructuredName.SUFFIX);
159635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
160635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
161635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
162635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
163635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
164635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
165635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
166635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
167635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
168635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
169622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
170622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
171622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public void toValues(ContentValues values) {
172635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PREFIX, prefix);
173635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
174635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
175635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
176635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.SUFFIX, suffix);
177635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
178635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
179635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
180635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
181635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
182635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
183635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
184635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        private void putValueIfPresent(ContentValues values, String name, String value) {
185635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (value != null) {
186635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                values.put(name, value);
187635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
188635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
189635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
190635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public void clear() {
191635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            prefix = null;
192635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            givenNames = null;
193635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            middleName = null;
194635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            familyName = null;
195635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            suffix = null;
196635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = FullNameStyle.UNDEFINED;
197635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticFamilyName = null;
198635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticMiddleName = null;
199635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticGivenName = null;
200635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
201622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
2025dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
2035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        public boolean isEmpty() {
2045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return TextUtils.isEmpty(givenNames)
2055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(middleName)
2065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(familyName)
2075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(suffix)
2085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticFamilyName)
2095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticMiddleName)
2105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticGivenName);
2115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
2125dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
2135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        @Override
2145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        public String toString() {
21517a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
21617a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + " family: " + familyName + " suffix: " + suffix + " ph/given: "
21717a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
21817a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + phoneticFamilyName + "]";
2195dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static class NameTokenizer extends StringTokenizer {
2234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private final String[] mTokens;
2244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mDotBitmask;
225635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        private int mCommaBitmask;
2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mStartPointer;
2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mEndPointer;
2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public NameTokenizer(String fullName) {
2304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            super(fullName, " .,", true);
2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            mTokens = new String[MAX_TOKENS];
2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // Iterate over tokens, skipping over empty ones and marking tokens that
2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // are followed by dots.
2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                final String token = nextToken();
2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (token.length() > 0) {
2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    final char c = token.charAt(0);
240635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    if (c == ' ') {
2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                        continue;
2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    }
2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (mEndPointer > 0 && token.charAt(0) == '.') {
2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mDotBitmask |= (1 << (mEndPointer - 1));
247635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                } else if (mEndPointer > 0 && token.charAt(0) == ',') {
248635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    mCommaBitmask |= (1 << (mEndPointer - 1));
2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                } else {
2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mTokens[mEndPointer] = token;
2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mEndPointer++;
2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        /**
2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         * Returns true if the token is followed by a dot in the original full name.
2584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         */
2594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public boolean hasDot(int index) {
2604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return (mDotBitmask & (1 << index)) != 0;
2614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
262635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
263635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        /**
264635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov         * Returns true if the token is followed by a comma in the original full name.
265635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov         */
266635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public boolean hasComma(int index) {
267635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return (mCommaBitmask & (1 << index)) != 0;
268635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
2694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
2724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Constructor.
2734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *
2744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonPrefixes comma-separated list of common prefixes,
2754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Mr, Ms, Mrs"
2764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
277635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *            e.g. "d', st, st., von"
2784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonSuffixes comma-separated list of common suffixes,
2794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Jr, M.D., MD, D.D.S."
2804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonConjunctions comma-separated list of common conjuctions,
2814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "AND, Or"
2824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
2834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
284622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            String commonSuffixes, String commonConjunctions, Locale locale) {
285622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        // TODO: refactor this to use <string-array> resources
2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mPrefixesSet = convertToSet(commonPrefixes);
2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mSuffixesSet = convertToSet(commonSuffixes);
2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mConjuctions = convertToSet(commonConjunctions);
290635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        mLocale = locale != null ? locale : Locale.getDefault();
291635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        mLanguage = mLocale.getLanguage().toLowerCase();
2924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int maxLength = 0;
2944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        for (String suffix : mSuffixesSet) {
2954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (suffix.length() > maxLength) {
2964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                maxLength = suffix.length();
2974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mMaxSuffixLength = maxLength;
3014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
3024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
3044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Converts a comma-separated list of Strings to a set of Strings. Trims strings
3054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * and converts them to upper case.
3064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
3074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static HashSet<String> convertToSet(String strings) {
3084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        HashSet<String> set = new HashSet<String>();
3094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (strings != null) {
3104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String[] split = strings.split(",");
3114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = 0; i < split.length; i++) {
3124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                set.add(split[i].trim().toUpperCase());
3134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
3144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
3154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        return set;
3164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
3174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
319f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov     * Parses a full name and returns components as a list of tokens.
320f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov     */
321f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    public int tokenize(String[] tokens, String fullName) {
322f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        if (fullName == null) {
323f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            return 0;
324f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
325f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
326f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        NameTokenizer tokenizer = new NameTokenizer(fullName);
327f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
328f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
329f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            return 0;
330f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
331f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
332f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
333f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        int count = 0;
334f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
335f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            tokens[count++] = tokenizer.mTokens[i];
336f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
337f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
338f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        return count;
339f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    }
340f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
341f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
342f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    /**
3434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses a full name and returns parsed components in the Name object.
3444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
3454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public void split(Name name, String fullName) {
3464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (fullName == null) {
3474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
3484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
3494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
350635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int fullNameStyle = guessFullNameStyle(fullName);
351635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (fullNameStyle == FullNameStyle.CJK) {
352635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
353635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
354635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
35556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        split(name, fullName, fullNameStyle);
35656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    }
35756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
35856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
35956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Parses a full name and returns parsed components in the Name object
36056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * with a given fullNameStyle.
36156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
36256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    public void split(Name name, String fullName, int fullNameStyle) {
36356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        if (fullName == null) {
36456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            return;
36556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        }
36656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
3675dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        name.fullNameStyle = fullNameStyle;
3685dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
369635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        switch (fullNameStyle) {
370635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.CHINESE:
371635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                splitChineseName(name, fullName);
372635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                break;
373635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
374635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.JAPANESE:
37556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                splitJapaneseName(name, fullName);
37656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                break;
37756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
378635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.KOREAN:
37956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                splitKoreanName(name, fullName);
380635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                break;
381635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
382635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            default:
383635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                splitWesternName(name, fullName);
384635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
385635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
386635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
387635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
388635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Western tradition:
389635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
390635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [prefix] given name(s) [[middle name] family name] [, suffix]
391635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [prefix] family name, given name [middle name] [,suffix]
392635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
393635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
394635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void splitWesternName(Name name, String fullName) {
3954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        NameTokenizer tokens = new NameTokenizer(fullName);
3964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parsePrefix(name, tokens);
397c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
398c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // If the name consists of just one or two tokens, treat them as first/last name,
399c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
400c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        if (tokens.mEndPointer > 2) {
401c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov            parseSuffix(name, tokens);
402c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        }
403c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
404c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
405c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            name.givenNames = tokens.mTokens[tokens.mStartPointer];
406c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        } else {
407c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseLastName(name, tokens);
408c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseMiddleName(name, tokens);
409c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseGivenNames(name, tokens);
410c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        }
4114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
4124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
4134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
414635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Chinese tradition:
415635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
416635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [family name [middle name]] given name
417635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
418635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
419635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void splitChineseName(Name name, String fullName) {
420635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringTokenizer tokenizer = new StringTokenizer(fullName);
421635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (tokenizer.hasMoreTokens()) {
422635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String token = tokenizer.nextToken();
423635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (name.givenNames == null) {
424635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
425635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.familyName == null) {
426635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = name.givenNames;
427635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
428635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.middleName == null) {
429635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = name.givenNames;
430635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
431635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
432635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = name.middleName + name.givenNames;
433635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
434635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
435635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
436635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
437635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If a single word parse that word up.
438635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name.givenNames != null && name.familyName == null && name.middleName == null) {
439635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int length = fullName.length();
440635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (length == 2) {
441635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 1);
442635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(1);
443635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (length == 3) {
444635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 1);
445635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = fullName.substring(1, 2);
446635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(2);
447635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (length == 4) {
448635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 2);
449635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = fullName.substring(2, 3);
450635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(3);
451635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
452635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
453635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
454635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
455635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
456635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
457635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Japanese tradition:
458635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
459635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [family name] given name(s)
460635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
461635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
46256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private void splitJapaneseName(Name name, String fullName) {
463635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringTokenizer tokenizer = new StringTokenizer(fullName);
464635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (tokenizer.hasMoreTokens()) {
465635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String token = tokenizer.nextToken();
466635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (name.givenNames == null) {
467635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
468635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.familyName == null) {
469635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = name.givenNames;
470635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
471635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
472635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames += " " + token;
473635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
474635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
475635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
476635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
477635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
47856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Splits a full name composed according to the Korean tradition:
47956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * <pre>
48056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     *   [family name] given name(s)
48156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * </pre>
48256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
48356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private void splitKoreanName(Name name, String fullName) {
48456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        StringTokenizer tokenizer = new StringTokenizer(fullName);
48556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        if (tokenizer.countTokens() > 1) {
48656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // Each name can be identified by separators.
48756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            while (tokenizer.hasMoreTokens()) {
48856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                String token = tokenizer.nextToken();
48956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                if (name.givenNames == null) {
49056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames = token;
49156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                } else if (name.familyName == null) {
49256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.familyName = name.givenNames;
49356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames = token;
49456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                } else {
49556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames += " " + token;
49656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                }
49756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
49856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        } else {
49956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // There is no separator. Try to guess family name.
50056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // The length of most family names is 1.
50156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            int familyNameLength = 1;
50256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
50356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // Compare with 2-length family names.
50456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
50556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                if (fullName.startsWith(twoLengthFamilyName)) {
50656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    familyNameLength = 2;
50756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    break;
50856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                }
50956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
51056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
51156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            name.familyName = fullName.substring(0, familyNameLength);
51256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            if (fullName.length() > familyNameLength) {
51356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                name.givenNames = fullName.substring(familyNameLength);
51456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
51556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        }
51656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    }
51756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
51856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
519635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Concatenates components of a name according to the rules dictated by the name style.
520635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *
521635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * @param givenNameFirst is ignored for CJK display name styles
522635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
52355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov    public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
52455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        String prefix = includePrefix ? name.prefix : null;
525635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        switch (name.fullNameStyle) {
526cdd03b2ba03718a7fa85663a2438136284a1557cBai Tao            case FullNameStyle.CJK:
527635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.CHINESE:
528635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.KOREAN:
52955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return join(prefix, name.familyName, name.middleName, name.givenNames,
53055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                        name.suffix, false, false, false);
531635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
532635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.JAPANESE:
53355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return join(prefix, name.familyName, name.middleName, name.givenNames,
53455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                        name.suffix, true, false, false);
535635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
536635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            default:
537635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (givenNameFirst) {
53855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                    return join(prefix, name.givenNames, name.middleName, name.familyName,
53955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                            name.suffix, true, false, true);
540635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                } else {
54155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                    return join(prefix, name.familyName, name.givenNames, name.middleName,
54255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                            name.suffix, true, true, true);
543635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
544635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
545635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
546635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
547635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
5485dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * Concatenates components of the phonetic name following the CJK tradition:
5495dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * family name + middle name + given name(s).
5505dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     */
5515dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public String joinPhoneticName(Name name) {
55255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        return join(null, name.phoneticFamilyName,
55355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
5545dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    }
5555dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
5565dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    /**
557635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Concatenates parts of a full name inserting spaces and commas as specified.
558635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
55955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov    private String join(String prefix, String part1, String part2, String part3, String suffix,
560635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
56155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        prefix = prefix == null ? null: prefix.trim();
562dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part1 = part1 == null ? null: part1.trim();
563dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part2 = part2 == null ? null: part2.trim();
564dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part3 = part3 == null ? null: part3.trim();
565dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        suffix = suffix == null ? null: suffix.trim();
566dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov
56755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        boolean hasPrefix = !TextUtils.isEmpty(prefix);
568635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart1 = !TextUtils.isEmpty(part1);
569635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart2 = !TextUtils.isEmpty(part2);
570635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart3 = !TextUtils.isEmpty(part3);
571635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasSuffix = !TextUtils.isEmpty(suffix);
572635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
573635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean isSingleWord = true;
574635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        String singleWord = null;
57555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
57655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (hasPrefix) {
57755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            singleWord = prefix;
57855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
57955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
580635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart1) {
58155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (singleWord != null) {
58255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                isSingleWord = false;
58355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            } else {
58455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                singleWord = part1;
58555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
586635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
587635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
588635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart2) {
589635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
590635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
591635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
592635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = part2;
593635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
594635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
595635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
596635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart3) {
597635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
598635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
599635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
600635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = part3;
601635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
602635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
603635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
604635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasSuffix) {
605635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
606635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
607635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
608635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = normalizedSuffix(suffix);
609635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
610635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
611635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
612635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (isSingleWord) {
613635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return singleWord;
614635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
615635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
616635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringBuilder sb = new StringBuilder();
61755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
61855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (hasPrefix) {
61955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            sb.append(prefix);
62055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
62155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
622635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart1) {
62355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix) {
62455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                sb.append(' ');
62555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
626635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part1);
627635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
628635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
629635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart2) {
63055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1) {
631635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useCommaAfterPart1) {
632635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(',');
633635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
634635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
635635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
636635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
637635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
638635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part2);
639635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
640635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
641635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart3) {
64255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1 || hasPart2) {
643635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
644635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
645635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
646635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
647635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part3);
648635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
649635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
650635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasSuffix) {
65155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
652635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useCommaAfterPart3) {
653635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(',');
654635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
655635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
656635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
657635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
658635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
659635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(normalizedSuffix(suffix));
660635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
661635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
662635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return sb.toString();
663635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
664635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
665635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
666635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
667635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
668635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
669635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private String normalizedSuffix(String suffix) {
670635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = suffix.length();
671635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (length == 0 || suffix.charAt(length - 1) == '.') {
672635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return suffix;
673635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
674635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
675635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        String withDot = suffix + '.';
676635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (mSuffixesSet.contains(withDot.toUpperCase())) {
677635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return withDot;
678635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        } else {
679635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return suffix;
680635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
681635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
682635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
683635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
684635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * If the supplied name style is undefined, returns a default based on the language,
685635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * otherwise returns the supplied name style itself.
686635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *
687635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * @param nameStyle See {@link FullNameStyle}.
688635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
689635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    public int getAdjustedFullNameStyle(int nameStyle) {
690635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (nameStyle == FullNameStyle.UNDEFINED) {
691635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
692635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.JAPANESE;
693635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
694635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.KOREAN;
695635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
696635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.CHINESE;
697635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
698635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.WESTERN;
699635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
700635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        } else if (nameStyle == FullNameStyle.CJK) {
701635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
702635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.JAPANESE;
703635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
704635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.KOREAN;
705635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
706635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.CHINESE;
707635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
708635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
709635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
710635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
711635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
712635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
7134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the first word from the name if it is a prefix.
7144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
7154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parsePrefix(Name name, NameTokenizer tokens) {
7164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String firstToken = tokens.mTokens[tokens.mStartPointer];
7214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
72217a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            if (tokens.hasDot(tokens.mStartPointer)) {
72317a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                firstToken += '.';
72417a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            }
7254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.prefix = firstToken;
7264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mStartPointer++;
7274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
7294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
7314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the last word(s) from the name if it is a suffix.
7324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
7334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseSuffix(Name name, NameTokenizer tokens) {
7344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
73955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
74055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        // Take care of an explicit comma-separated suffix
74155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (tokens.mEndPointer - tokens.mStartPointer > 2
74255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                && tokens.hasComma(tokens.mEndPointer - 2)) {
74355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (tokens.hasDot(tokens.mEndPointer - 1)) {
74455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                lastToken += '.';
74555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
74655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            name.suffix = lastToken;
74755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            tokens.mEndPointer--;
74855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            return;
74955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
75055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
7514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (lastToken.length() > mMaxSuffixLength) {
7524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String normalized = lastToken.toUpperCase();
7564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mSuffixesSet.contains(normalized)) {
7574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.suffix = lastToken;
7584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mEndPointer--;
7594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.hasDot(tokens.mEndPointer - 1)) {
7634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            lastToken += '.';
7644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        normalized += ".";
7664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        // Take care of suffixes like M.D. and D.D.S.
7684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int pos = tokens.mEndPointer - 1;
7694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        while (normalized.length() <= mMaxSuffixLength) {
7704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (mSuffixesSet.contains(normalized)) {
7724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.suffix = lastToken;
7734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer = pos;
7744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                return;
7754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (pos == tokens.mStartPointer) {
7784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                break;
7794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            pos--;
7824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (tokens.hasDot(pos)) {
7834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + "." + lastToken;
7844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            } else {
7854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + " " + lastToken;
7864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
7894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
7914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseLastName(Name name, NameTokenizer tokens) {
7934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
797635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If the first word is followed by a comma, assume that it's the family name
798635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (tokens.hasComma(tokens.mStartPointer)) {
799635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           name.familyName = tokens.mTokens[tokens.mStartPointer];
800635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           tokens.mStartPointer++;
801635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           return;
802635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
803635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
804635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If the second word is followed by a comma and the first word
805635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // is a last name prefix as in "de Sade" and "von Cliburn", treat
806635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // the first two words as the family name.
807635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (tokens.mStartPointer + 1 < tokens.mEndPointer
808635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                && tokens.hasComma(tokens.mStartPointer + 1)
809635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
810635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
811635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (tokens.hasDot(tokens.mStartPointer)) {
812635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                familyNamePrefix += '.';
813635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
814635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
815635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            tokens.mStartPointer += 2;
816635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
817635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
818635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
819635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // Finally, assume that the last word is the last name
8204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
8214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        tokens.mEndPointer--;
8224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
823635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // Take care of last names like "de Sade" and "von Cliburn"
8244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
8254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
826635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (isFamilyNamePrefix(lastNamePrefix)) {
8274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(tokens.mEndPointer - 1)) {
8284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    lastNamePrefix += '.';
8294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.familyName = lastNamePrefix + " " + name.familyName;
8314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
8324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
8354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
836635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
837635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
838635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
839635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private boolean isFamilyNamePrefix(String word) {
840635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        final String normalized = word.toUpperCase();
841635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
842635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return mLastNamePrefixesSet.contains(normalized)
843635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || mLastNamePrefixesSet.contains(normalized + ".");
844635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
845635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
8464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseMiddleName(Name name, NameTokenizer tokens) {
8484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
8494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
8504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
8534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if ((tokens.mEndPointer - tokens.mStartPointer) == 2
8544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
8554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                            toUpperCase())) {
8564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
857635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (tokens.hasDot(tokens.mEndPointer - 1)) {
858635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    name.middleName += '.';
859635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
8604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
8614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
8644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseGivenNames(Name name, NameTokenizer tokens) {
8664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
8674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
8684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
8714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = tokens.mTokens[tokens.mStartPointer];
8724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        } else {
8734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            StringBuilder sb = new StringBuilder();
8744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
8754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (i != tokens.mStartPointer) {
8764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append(' ');
8774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                sb.append(tokens.mTokens[i]);
8794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(i)) {
8804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append('.');
8814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = sb.toString();
8844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
886635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
887635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
888635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Makes the best guess at the expected full name style based on the character set
889635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * used in the supplied name.  If the phonetic name is also supplied, tries to
890635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * differentiate between Chinese, Japanese and Korean based on the alphabet used
891635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * for the phonetic name.
892635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
893635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    public void guessNameStyle(Name name) {
894635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guessFullNameStyle(name);
895635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guessPhoneticNameStyle(name);
8965dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
8975dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                name.phoneticNameStyle);
8985dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    }
899635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9005dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    /**
9015dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * Updates the display name style according to the phonetic name style if we
9025dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * were unsure about display name style based on the name components, but
9035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * phonetic name makes it more definitive.
9045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     */
9055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
9065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
9075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
9085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
9095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.JAPANESE;
9105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
9115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.KOREAN;
912635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
9135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
9145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.CHINESE;
915635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
916635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
917635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
9185dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        return nameStyle;
919635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
920635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
921635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
922635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Makes the best guess at the expected full name style based on the character set
923635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * used in the supplied name.
924635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
925635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void guessFullNameStyle(NameSplitter.Name name) {
9265dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
9275dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return;
9285dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
9295dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
930635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int bestGuess = guessFullNameStyle(name.givenNames);
9314cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        // A mix of Hanzi and latin chars are common in China, so we have to go through all names
9324cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        // if the name is not JANPANESE or KOREAN.
9334cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
9344cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                && bestGuess != FullNameStyle.WESTERN) {
935635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.fullNameStyle = bestGuess;
936635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
937635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
938635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
939635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int guess = guessFullNameStyle(name.familyName);
940635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
9414cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
942635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.fullNameStyle = guess;
943635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
944635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
945635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
946635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
947635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
948635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guess = guessFullNameStyle(name.middleName);
949635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
9504cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
951635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.fullNameStyle = guess;
952635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
953635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
954635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
955635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
956635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
95755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        guess = guessFullNameStyle(name.prefix);
95855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
95955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
96055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.fullNameStyle = guess;
96155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return;
96255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
96355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            bestGuess = guess;
96455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
96555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
96655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        guess = guessFullNameStyle(name.suffix);
96755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
96855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
96955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.fullNameStyle = guess;
97055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return;
97155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
97255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            bestGuess = guess;
97355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
97455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
975635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        name.fullNameStyle = bestGuess;
976635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
977635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9785dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int guessFullNameStyle(String name) {
979635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name == null) {
980635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return FullNameStyle.UNDEFINED;
981635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
982635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
983635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int nameStyle = FullNameStyle.UNDEFINED;
984635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
985635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int offset = 0;
986635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
987635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
988635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
989635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
990635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9914cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                if (!isLatinUnicodeBlock(unicodeBlock)) {
992635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9934cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isCJKUnicodeBlock(unicodeBlock)) {
9944cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        // We don't know if this is Chinese, Japanese or Korean -
9954cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        // trying to figure out by looking at other characters in the name
9964cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
9974cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
998635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9994cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
10004cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return FullNameStyle.JAPANESE;
10014cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
1002635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
10034cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isKoreanUnicodeBlock(unicodeBlock)) {
10044cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return FullNameStyle.KOREAN;
10054cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
1006635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
10074cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                nameStyle = FullNameStyle.WESTERN;
1008635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1009635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1010635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1011635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
1012635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1013635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1014635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private int guessCJKNameStyle(String name, int offset) {
1015635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
1016635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
1017635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
1018635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
1019635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
1020635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
1021635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return FullNameStyle.JAPANESE;
1022635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1023635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isKoreanUnicodeBlock(unicodeBlock)) {
1024635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return FullNameStyle.KOREAN;
1025635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1026635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1027635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1028635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1029635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1030635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return FullNameStyle.CJK;
1031635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1032635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1033635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void guessPhoneticNameStyle(NameSplitter.Name name) {
10345dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
10355dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return;
10365dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
10375dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
1038635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
1039635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
1040635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.phoneticNameStyle = bestGuess;
1041635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
1042635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1043635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1044635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int guess = guessPhoneticNameStyle(name.phoneticGivenName);
1045635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
1046635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (guess != FullNameStyle.CJK) {
1047635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.phoneticNameStyle = guess;
1048635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
1049635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1050635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
1051635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1052635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1053635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guess = guessPhoneticNameStyle(name.phoneticMiddleName);
1054635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
1055635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (guess != FullNameStyle.CJK) {
1056635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.phoneticNameStyle = guess;
1057635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
1058635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1059635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
1060635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1061635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1062635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
10635dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int guessPhoneticNameStyle(String name) {
1064635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name == null) {
1065635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return PhoneticNameStyle.UNDEFINED;
1066635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1067635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1068635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int nameStyle = PhoneticNameStyle.UNDEFINED;
1069635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
1070635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int offset = 0;
1071635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
1072635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
1073635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
1074635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
1075635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
1076635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.JAPANESE;
1077635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1078635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isKoreanUnicodeBlock(unicodeBlock)) {
1079635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.KOREAN;
1080635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1081635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isLatinUnicodeBlock(unicodeBlock)) {
1082635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.PINYIN;
1083635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1084635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1085635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1086635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1087635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1088635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
1089635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1090635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1091635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
1092635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
1093635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
1094635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
1095635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
1096635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
1097635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1098635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1099635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
1100635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1101635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1102635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1103635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1104635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1105635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY
1106635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
1107635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1108635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
1109635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1110635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1111635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
1112635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
1113635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
1114635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
1115635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1116635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1117635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
1118635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.KATAKANA ||
1119635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
1120635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
1121635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HIRAGANA;
1122635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
11234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov}
1124