NameSplitter.java revision c19e02a37399c55b852d6570f73553e859b0139a
14097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/*
24097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Copyright (C) 2009 The Android Open Source Project
34097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
44097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
54097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * you may not use this file except in compliance with the License.
64097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * You may obtain a copy of the License at
74097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
84097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
94097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov *
104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * See the License for the specific language governing permissions and
144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * limitations under the License
154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
1628f8857b1b46bde18b85c6d3c2a63ac44c3c2e1cEvan Millarpackage com.android.providers.contacts;
174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.HashSet;
194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovimport java.util.StringTokenizer;
204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov/**
224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * The purpose of this class is to split a full name into given names and last
234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * name. The logic only supports having a single last name. If the full name has
244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * multiple last names the output will be incorrect.
254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <p>
264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * Core algorithm:
274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <ol>
284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the last remaining token as the last name.</li>
314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * this word also as the last name.</li>
334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * <li>Assign the rest of the words as the "given names".</li>
344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov * </ol>
354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov */
364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikovpublic class NameSplitter {
374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mPrefixesSet;
394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mSuffixesSet;
404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final int mMaxSuffixLength;
414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mLastNamePrefixesSet;
424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private final HashSet<String> mConjuctions;
434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public static class Name {
454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private String prefix;
464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private String givenNames;
474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private String middleName;
484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private String familyName;
494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private String suffix;
504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getPrefix() {
524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return prefix;
534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getGivenNames() {
564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return givenNames;
574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getMiddleName() {
604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return middleName;
614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getFamilyName() {
644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return familyName;
654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public String getSuffix() {
684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return suffix;
694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static class NameTokenizer extends StringTokenizer {
734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private static final int MAX_TOKENS = 10;
744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private final String[] mTokens;
754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mDotBitmask;
764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mStartPointer;
774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        private int mEndPointer;
784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public NameTokenizer(String fullName) {
804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            super(fullName, " .,", true);
814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            mTokens = new String[MAX_TOKENS];
834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // Iterate over tokens, skipping over empty ones and marking tokens that
854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            // are followed by dots.
864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                final String token = nextToken();
884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (token.length() > 0) {
894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    final char c = token.charAt(0);
904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    if (c == ' ' || c == ',') {
914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                        continue;
924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    }
934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (mEndPointer > 0 && token.charAt(0) == '.') {
964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mDotBitmask |= (1 << (mEndPointer - 1));
974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                } else {
984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mTokens[mEndPointer] = token;
994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    mEndPointer++;
1004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
1014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
1024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        /**
1054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         * Returns true if the token is followed by a dot in the original full name.
1064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov         */
1074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        public boolean hasDot(int index) {
1084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return (mDotBitmask & (1 << index)) != 0;
1094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
1114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
1134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Constructor.
1144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *
1154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonPrefixes comma-separated list of common prefixes,
1164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Mr, Ms, Mrs"
1174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
1184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *           e.g. "d', st, st., von"
1194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonSuffixes comma-separated list of common suffixes,
1204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "Jr, M.D., MD, D.D.S."
1214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * @param commonConjunctions comma-separated list of common conjuctions,
1224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     *            e.g. "AND, Or"
1234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
1244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
1254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String commonSuffixes, String commonConjunctions) {
1264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mPrefixesSet = convertToSet(commonPrefixes);
1274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
1284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mSuffixesSet = convertToSet(commonSuffixes);
1294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mConjuctions = convertToSet(commonConjunctions);
1304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int maxLength = 0;
1324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        for (String suffix : mSuffixesSet) {
1334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (suffix.length() > maxLength) {
1344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                maxLength = suffix.length();
1354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
1364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        mMaxSuffixLength = maxLength;
1394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
1404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
1424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Converts a comma-separated list of Strings to a set of Strings. Trims strings
1434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * and converts them to upper case.
1444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
1454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private static HashSet<String> convertToSet(String strings) {
1464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        HashSet<String> set = new HashSet<String>();
1474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (strings != null) {
1484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String[] split = strings.split(",");
1494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = 0; i < split.length; i++) {
1504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                set.add(split[i].trim().toUpperCase());
1514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
1524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        return set;
1544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
1554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
1574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses a full name and returns parsed components in the Name object.
1584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
1594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    public void split(Name name, String fullName) {
1604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (fullName == null) {
1614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
1624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        NameTokenizer tokens = new NameTokenizer(fullName);
1654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parsePrefix(name, tokens);
166c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
167c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // If the name consists of just one or two tokens, treat them as first/last name,
168c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
169c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        if (tokens.mEndPointer > 2) {
170c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov            parseSuffix(name, tokens);
171c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov        }
172c19e02a37399c55b852d6570f73553e859b0139aDmitri Plotnikov
1734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parseLastName(name, tokens);
1744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parseMiddleName(name, tokens);
1754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        parseGivenNames(name, tokens);
1764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
1774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
1794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the first word from the name if it is a prefix.
1804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
1814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parsePrefix(Name name, NameTokenizer tokens) {
1824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
1834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
1844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String firstToken = tokens.mTokens[tokens.mStartPointer];
1874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
1884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.prefix = firstToken;
1894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mStartPointer++;
1904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
1914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
1924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
1934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    /**
1944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     * Parses the last word(s) from the name if it is a suffix.
1954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov     */
1964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseSuffix(Name name, NameTokenizer tokens) {
1974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
1984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
1994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
2024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (lastToken.length() > mMaxSuffixLength) {
2034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
2044097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2054097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2064097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        String normalized = lastToken.toUpperCase();
2074097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (mSuffixesSet.contains(normalized)) {
2084097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.suffix = lastToken;
2094097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            tokens.mEndPointer--;
2104097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
2114097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2124097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2134097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.hasDot(tokens.mEndPointer - 1)) {
2144097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            lastToken += '.';
2154097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2164097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        normalized += ".";
2174097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2184097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        // Take care of suffixes like M.D. and D.D.S.
2194097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        int pos = tokens.mEndPointer - 1;
2204097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        while (normalized.length() <= mMaxSuffixLength) {
2214097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2224097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (mSuffixesSet.contains(normalized)) {
2234097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.suffix = lastToken;
2244097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer = pos;
2254097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                return;
2264097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2274097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2284097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (pos == tokens.mStartPointer) {
2294097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                break;
2304097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2314097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2324097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            pos--;
2334097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (tokens.hasDot(pos)) {
2344097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + "." + lastToken;
2354097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            } else {
2364097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                lastToken = tokens.mTokens[pos] + " " + lastToken;
2374097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2384097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2394097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
2404097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2414097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2424097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2434097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseLastName(Name name, NameTokenizer tokens) {
2444097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
2454097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
2464097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2474097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2484097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
2494097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        tokens.mEndPointer--;
2504097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2514097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        // Take care of last names like "D'Onofrio" and "von Cliburn"
2524097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
2534097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
2544097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            final String normalized = lastNamePrefix.toUpperCase();
2554097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if (mLastNamePrefixesSet.contains(normalized)
2564097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    || mLastNamePrefixesSet.contains(normalized + ".")) {
2574097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(tokens.mEndPointer - 1)) {
2584097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    lastNamePrefix += '.';
2594097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2604097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.familyName = lastNamePrefix + " " + name.familyName;
2614097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
2624097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2634097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2644097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2654097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2664097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2674097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseMiddleName(Name name, NameTokenizer tokens) {
2684097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
2694097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
2704097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2714097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2724097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
2734097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            if ((tokens.mEndPointer - tokens.mStartPointer) == 2
2744097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
2754097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                            toUpperCase())) {
2764097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
2774097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                tokens.mEndPointer--;
2784097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
2794097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2804097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
2814097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2824097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    private void parseGivenNames(Name name, NameTokenizer tokens) {
2834097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if (tokens.mStartPointer == tokens.mEndPointer) {
2844097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            return;
2854097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
2864097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov
2874097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
2884097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = tokens.mTokens[tokens.mStartPointer];
2894097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        } else {
2904097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            StringBuilder sb = new StringBuilder();
2914097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
2924097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (i != tokens.mStartPointer) {
2934097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append(' ');
2944097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2954097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                sb.append(tokens.mTokens[i]);
2964097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                if (tokens.hasDot(i)) {
2974097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                    sb.append('.');
2984097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov                }
2994097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            }
3004097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov            name.givenNames = sb.toString();
3014097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov        }
3024097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov    }
3034097855e2d672b3f8e1c5c8a169efb80203bf53eDmitri Plotnikov}
304