14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/*
24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project
34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License.
64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at
74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and
144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License
154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts;
174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
18622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport android.content.ContentValues;
1938210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport android.provider.ContactsContract.CommonDataKinds.StructuredName;
20635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.FullNameStyle;
21635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport android.provider.ContactsContract.PhoneticNameStyle;
22f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikovimport android.text.TextUtils;
23622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
2438210445730ee04c351c7cc1b3800cfe23e34325Makoto Onukiimport com.android.providers.contacts.util.NeededForTesting;
2538210445730ee04c351c7cc1b3800cfe23e34325Makoto Onuki
26635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikovimport java.lang.Character.UnicodeBlock;
274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet;
28622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkeyimport java.util.Locale;
294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer;
304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/**
324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last
334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has
344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect.
354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p>
364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm:
374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol>
384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li>
414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li>
434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li>
444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol>
454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter {
474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
48f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    public static final int MAX_TOKENS = 10;
49f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
50635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
51635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
52635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
53635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    // This includes simplified and traditional Chinese
54635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
55635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mPrefixesSet;
574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mSuffixesSet;
584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final int mMaxSuffixLength;
594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mLastNamePrefixesSet;
604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mConjuctions;
61622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey    private final Locale mLocale;
62635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private final String mLanguage;
634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
6456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
6556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Two-Chracter long Korean family names.
6656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
6756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
6856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
6956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uAC15\uC804", // Gang Jeon
7056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB0A8\uAD81", // Nam Goong
7156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB3C5\uACE0", // Dok Go
7256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB3D9\uBC29", // Dong Bang
7356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uB9DD\uC808", // Mang Jeol
7456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC0AC\uACF5", // Sa Gong
7556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC11C\uBB38", // Seo Moon
7656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC120\uC6B0", // Seon Woo
7756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC18C\uBD09", // So Bong
7856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC5B4\uAE08", // Uh Geum
7956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC7A5\uACE1", // Jang Gok
8056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uC81C\uAC08", // Je Gal
8156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        "\uD669\uBCF4"  // Hwang Bo
8256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    };
8356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public static class Name {
85635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String prefix;
86635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String givenNames;
87635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String middleName;
88635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String familyName;
89635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String suffix;
90635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
91635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public int fullNameStyle;
92635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
93635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticFamilyName;
94635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticMiddleName;
95635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public String phoneticGivenName;
96635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
97635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public int phoneticNameStyle;
984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
99622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public Name() {
100622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
101622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
102622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public Name(String prefix, String givenNames, String middleName, String familyName,
103622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey                String suffix) {
104622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.prefix = prefix;
105622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.givenNames = givenNames;
106622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.middleName = middleName;
107622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.familyName = familyName;
108622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            this.suffix = suffix;
109622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
110622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
11110178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki        @NeededForTesting
1124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getPrefix() {
1134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return prefix;
1144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getGivenNames() {
1174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return givenNames;
1184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getMiddleName() {
1214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return middleName;
1224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getFamilyName() {
1254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return familyName;
1264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
12810178e5e0b9de566e04508b624a89860c61787d6Makoto Onuki        @NeededForTesting
1294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getSuffix() {
1304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return suffix;
1314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
132622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
133ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public int getFullNameStyle() {
134ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return fullNameStyle;
135ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
136ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
137ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticFamilyName() {
138ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticFamilyName;
139ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
140ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
141ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticMiddleName() {
142ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticMiddleName;
143ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
144ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
145ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public String getPhoneticGivenName() {
146ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticGivenName;
147ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
148ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
149ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        public int getPhoneticNameStyle() {
150ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov            return phoneticNameStyle;
151ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov        }
152ae7733451f6ddf3246efcd7fd4fc6882eefa6657Dmitri Plotnikov
153622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public void fromValues(ContentValues values) {
154622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            prefix = values.getAsString(StructuredName.PREFIX);
155622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            givenNames = values.getAsString(StructuredName.GIVEN_NAME);
156622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            middleName = values.getAsString(StructuredName.MIDDLE_NAME);
157622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            familyName = values.getAsString(StructuredName.FAMILY_NAME);
158622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            suffix = values.getAsString(StructuredName.SUFFIX);
159635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
160635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
161635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
162635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
163635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
164635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
165635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
166635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
167635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
168635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
169622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
170622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey
171622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        public void toValues(ContentValues values) {
172635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PREFIX, prefix);
173635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
174635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
175635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
176635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.SUFFIX, suffix);
177635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
178635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
179635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
180635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
181635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
182635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
183635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
184635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        private void putValueIfPresent(ContentValues values, String name, String value) {
185635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (value != null) {
186635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                values.put(name, value);
187635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
188635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
189635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
190635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public void clear() {
191635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            prefix = null;
192635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            givenNames = null;
193635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            middleName = null;
194635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            familyName = null;
195635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            suffix = null;
196635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = FullNameStyle.UNDEFINED;
197635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticFamilyName = null;
198635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticMiddleName = null;
199635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticGivenName = null;
200635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
201622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        }
2025dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
2035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        public boolean isEmpty() {
2045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return TextUtils.isEmpty(givenNames)
2055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(middleName)
2065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(familyName)
2075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(suffix)
2085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticFamilyName)
2095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticMiddleName)
2105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    && TextUtils.isEmpty(phoneticGivenName);
2115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
2125dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
2135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        @Override
2145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        public String toString() {
21517a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
21617a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + " family: " + familyName + " suffix: " + suffix + " ph/given: "
21717a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
21817a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                    + phoneticFamilyName + "]";
2195dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static class NameTokenizer extends StringTokenizer {
2234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private final String[] mTokens;
2244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mDotBitmask;
225635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        private int mCommaBitmask;
2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mStartPointer;
2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mEndPointer;
2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public NameTokenizer(String fullName) {
2304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            super(fullName, " .,", true);
2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            mTokens = new String[MAX_TOKENS];
2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // Iterate over tokens, skipping over empty ones and marking tokens that
2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // are followed by dots.
2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                final String token = nextToken();
2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (token.length() > 0) {
2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    final char c = token.charAt(0);
240635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    if (c == ' ') {
2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                        continue;
2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    }
2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (mEndPointer > 0 && token.charAt(0) == '.') {
2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mDotBitmask |= (1 << (mEndPointer - 1));
247635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                } else if (mEndPointer > 0 && token.charAt(0) == ',') {
248635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    mCommaBitmask |= (1 << (mEndPointer - 1));
2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                } else {
2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mTokens[mEndPointer] = token;
2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mEndPointer++;
2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        /**
2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         * Returns true if the token is followed by a dot in the original full name.
2584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         */
2594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public boolean hasDot(int index) {
2604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return (mDotBitmask & (1 << index)) != 0;
2614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
262635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
263635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        /**
264635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov         * Returns true if the token is followed by a comma in the original full name.
265635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov         */
266635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        public boolean hasComma(int index) {
267635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return (mCommaBitmask & (1 << index)) != 0;
268635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
2694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
2724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Constructor.
2734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *
2744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonPrefixes comma-separated list of common prefixes,
2754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Mr, Ms, Mrs"
2764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
277635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *            e.g. "d', st, st., von"
2784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonSuffixes comma-separated list of common suffixes,
2794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Jr, M.D., MD, D.D.S."
2804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonConjunctions comma-separated list of common conjuctions,
2814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "AND, Or"
2824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
2834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
284622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey            String commonSuffixes, String commonConjunctions, Locale locale) {
285622e0a2f00b3de248926ec9e89b11a6425919819Jeff Sharkey        // TODO: refactor this to use <string-array> resources
2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mPrefixesSet = convertToSet(commonPrefixes);
2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mSuffixesSet = convertToSet(commonSuffixes);
2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mConjuctions = convertToSet(commonConjunctions);
290635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        mLocale = locale != null ? locale : Locale.getDefault();
291635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        mLanguage = mLocale.getLanguage().toLowerCase();
2924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int maxLength = 0;
2944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        for (String suffix : mSuffixesSet) {
2954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (suffix.length() > maxLength) {
2964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                maxLength = suffix.length();
2974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mMaxSuffixLength = maxLength;
3014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
3024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
3044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Converts a comma-separated list of Strings to a set of Strings. Trims strings
3054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * and converts them to upper case.
3064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
3074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static HashSet<String> convertToSet(String strings) {
3084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        HashSet<String> set = new HashSet<String>();
3094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (strings != null) {
3104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String[] split = strings.split(",");
3114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = 0; i < split.length; i++) {
3124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                set.add(split[i].trim().toUpperCase());
3134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
3144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
3154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        return set;
3164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
3174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
3184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
319f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov     * Parses a full name and returns components as a list of tokens.
320f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov     */
321f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    public int tokenize(String[] tokens, String fullName) {
322f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        if (fullName == null) {
323f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            return 0;
324f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
325f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
326f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        NameTokenizer tokenizer = new NameTokenizer(fullName);
327f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
328f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
329f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            return 0;
330f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
331f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
332f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
333f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
334f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov           tokenizer.mStartPointer++;
335f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
336f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        int count = 0;
337f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
338f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov            tokens[count++] = tokenizer.mTokens[i];
339f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        }
340f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
341f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov        return count;
342f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    }
343f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
344f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov
345f23764675b35b5262a39c79aad8e9842460274b2Dmitri Plotnikov    /**
3464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses a full name and returns parsed components in the Name object.
3474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
3484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public void split(Name name, String fullName) {
3494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (fullName == null) {
3504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
3514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
3524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
353635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int fullNameStyle = guessFullNameStyle(fullName);
354635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (fullNameStyle == FullNameStyle.CJK) {
355635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
356635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
357635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
35856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        split(name, fullName, fullNameStyle);
35956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    }
36056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
36156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
36256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Parses a full name and returns parsed components in the Name object
36356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * with a given fullNameStyle.
36456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
36556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    public void split(Name name, String fullName, int fullNameStyle) {
36656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        if (fullName == null) {
36756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            return;
36856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        }
36956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
3705dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        name.fullNameStyle = fullNameStyle;
3715dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
372635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        switch (fullNameStyle) {
373635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.CHINESE:
374635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                splitChineseName(name, fullName);
375635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                break;
376635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
377635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.JAPANESE:
37856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                splitJapaneseName(name, fullName);
37956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                break;
38056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
381635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.KOREAN:
38256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                splitKoreanName(name, fullName);
383635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                break;
384635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
385635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            default:
386635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                splitWesternName(name, fullName);
387635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
388635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
389635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
390635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
391635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Western tradition:
392635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
393635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [prefix] given name(s) [[middle name] family name] [, suffix]
394635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [prefix] family name, given name [middle name] [,suffix]
395635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
396635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
397635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void splitWesternName(Name name, String fullName) {
3984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        NameTokenizer tokens = new NameTokenizer(fullName);
3994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parsePrefix(name, tokens);
400c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
401c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // If the name consists of just one or two tokens, treat them as first/last name,
402c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
403c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        if (tokens.mEndPointer > 2) {
404c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov            parseSuffix(name, tokens);
405c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        }
406c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
407c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
408c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            name.givenNames = tokens.mTokens[tokens.mStartPointer];
409c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        } else {
410c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseLastName(name, tokens);
411c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseMiddleName(name, tokens);
412c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov            parseGivenNames(name, tokens);
413c10787a94ab8d0163480b6f33bd4367d142164d4Dmitri Plotnikov        }
4144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
4154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
4164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
417635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Chinese tradition:
418635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
419635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [family name [middle name]] given name
420635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
421635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
422635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void splitChineseName(Name name, String fullName) {
423635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringTokenizer tokenizer = new StringTokenizer(fullName);
424635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (tokenizer.hasMoreTokens()) {
425635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String token = tokenizer.nextToken();
426635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (name.givenNames == null) {
427635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
428635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.familyName == null) {
429635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = name.givenNames;
430635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
431635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.middleName == null) {
432635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = name.givenNames;
433635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
434635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
435635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = name.middleName + name.givenNames;
436635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
437635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
438635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
439635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
440635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If a single word parse that word up.
441635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name.givenNames != null && name.familyName == null && name.middleName == null) {
442635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int length = fullName.length();
443635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (length == 2) {
444635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 1);
445635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(1);
446635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (length == 3) {
447635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 1);
448635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = fullName.substring(1, 2);
449635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(2);
450635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (length == 4) {
451635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = fullName.substring(0, 2);
452635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.middleName = fullName.substring(2, 3);
453635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = fullName.substring(3);
454635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
455635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
456635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
457635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
458635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
459635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
460635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Splits a full name composed according to the Japanese tradition:
461635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * <pre>
462635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *   [family name] given name(s)
463635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * </pre>
464635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
46556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private void splitJapaneseName(Name name, String fullName) {
466635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringTokenizer tokenizer = new StringTokenizer(fullName);
467635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (tokenizer.hasMoreTokens()) {
468635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String token = tokenizer.nextToken();
469635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (name.givenNames == null) {
470635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
471635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (name.familyName == null) {
472635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.familyName = name.givenNames;
473635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames = token;
474635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
475635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.givenNames += " " + token;
476635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
477635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
478635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
479635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
480635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
48156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * Splits a full name composed according to the Korean tradition:
48256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * <pre>
48356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     *   [family name] given name(s)
48456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     * </pre>
48556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee     */
48656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    private void splitKoreanName(Name name, String fullName) {
48756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        StringTokenizer tokenizer = new StringTokenizer(fullName);
48856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        if (tokenizer.countTokens() > 1) {
48956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // Each name can be identified by separators.
49056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            while (tokenizer.hasMoreTokens()) {
49156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                String token = tokenizer.nextToken();
49256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                if (name.givenNames == null) {
49356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames = token;
49456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                } else if (name.familyName == null) {
49556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.familyName = name.givenNames;
49656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames = token;
49756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                } else {
49856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    name.givenNames += " " + token;
49956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                }
50056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
50156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        } else {
50256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // There is no separator. Try to guess family name.
50356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // The length of most family names is 1.
50456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            int familyNameLength = 1;
50556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
50656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            // Compare with 2-length family names.
50756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
50856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                if (fullName.startsWith(twoLengthFamilyName)) {
50956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    familyNameLength = 2;
51056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                    break;
51156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                }
51256f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
51356f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
51456f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            name.familyName = fullName.substring(0, familyNameLength);
51556f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            if (fullName.length() > familyNameLength) {
51656f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee                name.givenNames = fullName.substring(familyNameLength);
51756f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee            }
51856f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee        }
51956f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    }
52056f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee
52156f2638b49e6bca97f6aa7b0768a8f1fe6e7b72eSang-il, Lee    /**
522635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Concatenates components of a name according to the rules dictated by the name style.
523635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *
524635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * @param givenNameFirst is ignored for CJK display name styles
525635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
52655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov    public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
52755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        String prefix = includePrefix ? name.prefix : null;
528635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        switch (name.fullNameStyle) {
529cdd03b2ba03718a7fa85663a2438136284a1557cBai Tao            case FullNameStyle.CJK:
530635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.CHINESE:
531635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.KOREAN:
53255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return join(prefix, name.familyName, name.middleName, name.givenNames,
53355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                        name.suffix, false, false, false);
534635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
535635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            case FullNameStyle.JAPANESE:
53655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return join(prefix, name.familyName, name.middleName, name.givenNames,
53755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                        name.suffix, true, false, false);
538635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
539635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            default:
540635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (givenNameFirst) {
54155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                    return join(prefix, name.givenNames, name.middleName, name.familyName,
54255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                            name.suffix, true, false, true);
543635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                } else {
54455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                    return join(prefix, name.familyName, name.givenNames, name.middleName,
54555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                            name.suffix, true, true, true);
546635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
547635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
548635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
549635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
550635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
5515dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * Concatenates components of the phonetic name following the CJK tradition:
5525dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * family name + middle name + given name(s).
5535dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     */
5545dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public String joinPhoneticName(Name name) {
55555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        return join(null, name.phoneticFamilyName,
55655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
5575dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    }
5585dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
5595dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    /**
560635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Concatenates parts of a full name inserting spaces and commas as specified.
561635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
56255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov    private String join(String prefix, String part1, String part2, String part3, String suffix,
563635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
56455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        prefix = prefix == null ? null: prefix.trim();
565dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part1 = part1 == null ? null: part1.trim();
566dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part2 = part2 == null ? null: part2.trim();
567dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        part3 = part3 == null ? null: part3.trim();
568dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov        suffix = suffix == null ? null: suffix.trim();
569dadaeebb6ff4845c5cad1c294fca669e7ac446eeDmitri Plotnikov
57055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        boolean hasPrefix = !TextUtils.isEmpty(prefix);
571635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart1 = !TextUtils.isEmpty(part1);
572635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart2 = !TextUtils.isEmpty(part2);
573635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasPart3 = !TextUtils.isEmpty(part3);
574635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean hasSuffix = !TextUtils.isEmpty(suffix);
575635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
576635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        boolean isSingleWord = true;
577635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        String singleWord = null;
57855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
57955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (hasPrefix) {
58055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            singleWord = prefix;
58155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
58255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
583635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart1) {
58455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (singleWord != null) {
58555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                isSingleWord = false;
58655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            } else {
58755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                singleWord = part1;
58855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
589635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
590635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
591635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart2) {
592635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
593635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
594635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
595635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = part2;
596635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
597635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
598635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
599635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart3) {
600635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
601635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
602635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
603635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = part3;
604635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
605635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
606635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
607635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasSuffix) {
608635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (singleWord != null) {
609635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                isSingleWord = false;
610635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
611635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                singleWord = normalizedSuffix(suffix);
612635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
613635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
614635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
615635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (isSingleWord) {
616635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return singleWord;
617635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
618635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
619635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        StringBuilder sb = new StringBuilder();
62055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
62155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (hasPrefix) {
62255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            sb.append(prefix);
62355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
62455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
625635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart1) {
62655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix) {
62755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                sb.append(' ');
62855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
629635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part1);
630635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
631635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
632635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart2) {
63355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1) {
634635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useCommaAfterPart1) {
635635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(',');
636635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
637635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
638635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
639635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
640635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
641635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part2);
642635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
643635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
644635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasPart3) {
64555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1 || hasPart2) {
646635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
647635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
648635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
649635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
650635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(part3);
651635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
652635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
653635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (hasSuffix) {
65455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
655635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useCommaAfterPart3) {
656635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(',');
657635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
658635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (useSpace) {
659635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    sb.append(' ');
660635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
661635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
662635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            sb.append(normalizedSuffix(suffix));
663635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
664635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
665635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return sb.toString();
666635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
667635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
668635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
669635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
670635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
671635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
672635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private String normalizedSuffix(String suffix) {
673635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = suffix.length();
674635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (length == 0 || suffix.charAt(length - 1) == '.') {
675635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return suffix;
676635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
677635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
678635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        String withDot = suffix + '.';
679635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (mSuffixesSet.contains(withDot.toUpperCase())) {
680635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return withDot;
681635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        } else {
682635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return suffix;
683635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
684635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
685635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
686635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
687635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * If the supplied name style is undefined, returns a default based on the language,
688635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * otherwise returns the supplied name style itself.
689635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     *
690635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * @param nameStyle See {@link FullNameStyle}.
691635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
692635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    public int getAdjustedFullNameStyle(int nameStyle) {
693635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (nameStyle == FullNameStyle.UNDEFINED) {
694635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
695635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.JAPANESE;
696635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
697635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.KOREAN;
698635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
699635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.CHINESE;
700635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
701635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.WESTERN;
702635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
703635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        } else if (nameStyle == FullNameStyle.CJK) {
704635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
705635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.JAPANESE;
706635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
707635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.KOREAN;
708635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            } else {
709635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return FullNameStyle.CHINESE;
710635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
711635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
712635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
713635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
714635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
715635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
7164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the first word from the name if it is a prefix.
7174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
7184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parsePrefix(Name name, NameTokenizer tokens) {
7194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String firstToken = tokens.mTokens[tokens.mStartPointer];
7244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
72517a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            if (tokens.hasDot(tokens.mStartPointer)) {
72617a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov                firstToken += '.';
72717a22fae02931ae536f35293ca13a8de53439f72Dmitri Plotnikov            }
7284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.prefix = firstToken;
7294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mStartPointer++;
7304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
7324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
7344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the last word(s) from the name if it is a suffix.
7354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
7364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseSuffix(Name name, NameTokenizer tokens) {
7374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
74255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
74355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        // Take care of an explicit comma-separated suffix
74455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (tokens.mEndPointer - tokens.mStartPointer > 2
74555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                && tokens.hasComma(tokens.mEndPointer - 2)) {
74655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (tokens.hasDot(tokens.mEndPointer - 1)) {
74755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                lastToken += '.';
74855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
74955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            name.suffix = lastToken;
75055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            tokens.mEndPointer--;
75155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            return;
75255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
75355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
7544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (lastToken.length() > mMaxSuffixLength) {
7554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String normalized = lastToken.toUpperCase();
7594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mSuffixesSet.contains(normalized)) {
7604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.suffix = lastToken;
7614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mEndPointer--;
7624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.hasDot(tokens.mEndPointer - 1)) {
7664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            lastToken += '.';
7674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        normalized += ".";
7694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        // Take care of suffixes like M.D. and D.D.S.
7714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int pos = tokens.mEndPointer - 1;
7724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        while (normalized.length() <= mMaxSuffixLength) {
7734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (mSuffixesSet.contains(normalized)) {
7754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.suffix = lastToken;
7764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer = pos;
7774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                return;
7784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (pos == tokens.mStartPointer) {
7814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                break;
7824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            pos--;
7854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (tokens.hasDot(pos)) {
7864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + "." + lastToken;
7874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            } else {
7884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + " " + lastToken;
7894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
7904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
7924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
7944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
7954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseLastName(Name name, NameTokenizer tokens) {
7964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
7974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
7984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
7994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
800635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If the first word is followed by a comma, assume that it's the family name
801635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (tokens.hasComma(tokens.mStartPointer)) {
802635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           name.familyName = tokens.mTokens[tokens.mStartPointer];
803635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           tokens.mStartPointer++;
804635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov           return;
805635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
806635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
807635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // If the second word is followed by a comma and the first word
808635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // is a last name prefix as in "de Sade" and "von Cliburn", treat
809635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // the first two words as the family name.
810635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (tokens.mStartPointer + 1 < tokens.mEndPointer
811635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                && tokens.hasComma(tokens.mStartPointer + 1)
812635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
813635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
814635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (tokens.hasDot(tokens.mStartPointer)) {
815635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                familyNamePrefix += '.';
816635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
817635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
818635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            tokens.mStartPointer += 2;
819635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
820635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
821635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
822635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // Finally, assume that the last word is the last name
8234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
8244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        tokens.mEndPointer--;
8254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
826635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        // Take care of last names like "de Sade" and "von Cliburn"
8274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
8284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
829635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (isFamilyNamePrefix(lastNamePrefix)) {
8304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(tokens.mEndPointer - 1)) {
8314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    lastNamePrefix += '.';
8324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.familyName = lastNamePrefix + " " + name.familyName;
8344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
8354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
8384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
839635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
840635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
841635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
842635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private boolean isFamilyNamePrefix(String word) {
843635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        final String normalized = word.toUpperCase();
844635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
845635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return mLastNamePrefixesSet.contains(normalized)
846635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || mLastNamePrefixesSet.contains(normalized + ".");
847635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
848635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
8494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseMiddleName(Name name, NameTokenizer tokens) {
8514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
8524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
8534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
8564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if ((tokens.mEndPointer - tokens.mStartPointer) == 2
8574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
8584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                            toUpperCase())) {
8594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
860635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (tokens.hasDot(tokens.mEndPointer - 1)) {
861635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    name.middleName += '.';
862635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
8634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
8644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
8674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseGivenNames(Name name, NameTokenizer tokens) {
8694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
8704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
8714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
8734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
8744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = tokens.mTokens[tokens.mStartPointer];
8754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        } else {
8764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            StringBuilder sb = new StringBuilder();
8774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
8784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (i != tokens.mStartPointer) {
8794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append(' ');
8804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                sb.append(tokens.mTokens[i]);
8824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(i)) {
8834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append('.');
8844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
8854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
8864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = sb.toString();
8874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
8884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
889635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
890635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
891635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Makes the best guess at the expected full name style based on the character set
892635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * used in the supplied name.  If the phonetic name is also supplied, tries to
893635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * differentiate between Chinese, Japanese and Korean based on the alphabet used
894635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * for the phonetic name.
895635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
896635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    public void guessNameStyle(Name name) {
897635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guessFullNameStyle(name);
898635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guessPhoneticNameStyle(name);
8995dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
9005dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                name.phoneticNameStyle);
9015dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    }
902635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9035dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    /**
9045dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * Updates the display name style according to the phonetic name style if we
9055dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * were unsure about display name style based on the name components, but
9065dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     * phonetic name makes it more definitive.
9075dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov     */
9085dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
9095dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
9105dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
9115dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
9125dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.JAPANESE;
9135dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
9145dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.KOREAN;
915635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
9165dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
9175dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov                    return FullNameStyle.CHINESE;
918635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
919635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
920635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
9215dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        return nameStyle;
922635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
923635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
924635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    /**
925635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * Makes the best guess at the expected full name style based on the character set
926635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     * used in the supplied name.
927635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov     */
928635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void guessFullNameStyle(NameSplitter.Name name) {
9295dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
9305dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return;
9315dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
9325dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
933635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int bestGuess = guessFullNameStyle(name.givenNames);
9344cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        // A mix of Hanzi and latin chars are common in China, so we have to go through all names
9354cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        // if the name is not JANPANESE or KOREAN.
9364cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
9374cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                && bestGuess != FullNameStyle.WESTERN) {
938635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.fullNameStyle = bestGuess;
939635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
940635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
941635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
942635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int guess = guessFullNameStyle(name.familyName);
943635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
9444cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
945635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.fullNameStyle = guess;
946635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
947635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
948635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
949635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
950635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
951635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guess = guessFullNameStyle(name.middleName);
952635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
9534cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
954635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.fullNameStyle = guess;
955635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
956635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
957635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
958635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
959635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
96055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        guess = guessFullNameStyle(name.prefix);
96155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
96255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
96355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.fullNameStyle = guess;
96455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return;
96555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
96655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            bestGuess = guess;
96755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
96855e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
96955e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        guess = guessFullNameStyle(name.suffix);
97055e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
97155e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
97255e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                name.fullNameStyle = guess;
97355e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov                return;
97455e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            }
97555e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov            bestGuess = guess;
97655e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov        }
97755e5cbf566edd89fc55f4a7f0ef2847084da9b16Dmitri Plotnikov
978635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        name.fullNameStyle = bestGuess;
979635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
980635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9815dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int guessFullNameStyle(String name) {
982635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name == null) {
983635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return FullNameStyle.UNDEFINED;
984635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
985635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
986635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int nameStyle = FullNameStyle.UNDEFINED;
987635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
988635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int offset = 0;
989635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
990635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
991635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
992635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
993635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9944cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                if (!isLatinUnicodeBlock(unicodeBlock)) {
995635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
9964cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isCJKUnicodeBlock(unicodeBlock)) {
9974cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        // We don't know if this is Chinese, Japanese or Korean -
9984cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        // trying to figure out by looking at other characters in the name
9994cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
10004cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
1001635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
10024cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
10034cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return FullNameStyle.JAPANESE;
10044cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
1005635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
10064cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    if (isKoreanUnicodeBlock(unicodeBlock)) {
10074cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                        return FullNameStyle.KOREAN;
10084cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                    }
1009635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
10104cd13c4266d8e476e1a49c4b6bcd5b18c33d0de3Bai Tao                nameStyle = FullNameStyle.WESTERN;
1011635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1012635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1013635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1014635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
1015635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1016635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1017635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private int guessCJKNameStyle(String name, int offset) {
1018635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
1019635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
1020635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
1021635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
1022635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
1023635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
1024635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return FullNameStyle.JAPANESE;
1025635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1026635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isKoreanUnicodeBlock(unicodeBlock)) {
1027635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return FullNameStyle.KOREAN;
1028635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1029635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1030635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1031635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1032635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1033635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return FullNameStyle.CJK;
1034635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1035635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1036635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private void guessPhoneticNameStyle(NameSplitter.Name name) {
10375dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
10385dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov            return;
10395dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov        }
10405dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov
1041635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
1042635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
1043635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            name.phoneticNameStyle = bestGuess;
1044635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return;
1045635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1046635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1047635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int guess = guessPhoneticNameStyle(name.phoneticGivenName);
1048635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
1049635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (guess != FullNameStyle.CJK) {
1050635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.phoneticNameStyle = guess;
1051635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
1052635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1053635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
1054635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1055635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1056635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        guess = guessPhoneticNameStyle(name.phoneticMiddleName);
1057635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (guess != FullNameStyle.UNDEFINED) {
1058635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (guess != FullNameStyle.CJK) {
1059635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                name.phoneticNameStyle = guess;
1060635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                return;
1061635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1062635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            bestGuess = guess;
1063635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1064635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1065635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
10665dd6d5d4acb93adc05f1fde904080787f2397f51Dmitri Plotnikov    public int guessPhoneticNameStyle(String name) {
1067635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        if (name == null) {
1068635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            return PhoneticNameStyle.UNDEFINED;
1069635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1070635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1071635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int nameStyle = PhoneticNameStyle.UNDEFINED;
1072635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int length = name.length();
1073635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        int offset = 0;
1074635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        while (offset < length) {
1075635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            int codePoint = Character.codePointAt(name, offset);
1076635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            if (Character.isLetter(codePoint)) {
1077635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
1078635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
1079635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.JAPANESE;
1080635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1081635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isKoreanUnicodeBlock(unicodeBlock)) {
1082635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.KOREAN;
1083635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1084635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                if (isLatinUnicodeBlock(unicodeBlock)) {
1085635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                    return PhoneticNameStyle.PINYIN;
1086635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                }
1087635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            }
1088635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov            offset += Character.charCount(codePoint);
1089635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        }
1090635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1091635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return nameStyle;
1092635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1093635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1094635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
1095635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
1096635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
1097635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
1098635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
1099635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
1100635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1101635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1102635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
1103635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
1104635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1105635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1106635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1107635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1108635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY
1109635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
1110635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1111635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
1112635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1113635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1114635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
1115635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
1116635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
1117635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
1118635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
1119635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov
1120635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
1121635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov        return unicodeBlock == UnicodeBlock.KATAKANA ||
1122635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
1123635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
1124635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov                unicodeBlock == UnicodeBlock.HIRAGANA;
1125635a11c53a532d9b5aba5fd7c51a8d47dcb0aaf2Dmitri Plotnikov    }
11264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov}
1127