NameSplitter.java revision cdd03b2ba03718a7fa85663a2438136284a1557c
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16package com.android.providers.contacts;
17
18import com.android.internal.util.HanziToPinyin;
19import com.android.internal.util.HanziToPinyin.Token;
20
21import android.content.ContentValues;
22import android.provider.ContactsContract.FullNameStyle;
23import android.provider.ContactsContract.PhoneticNameStyle;
24import android.provider.ContactsContract.CommonDataKinds.StructuredName;
25import android.text.TextUtils;
26
27import java.lang.Character.UnicodeBlock;
28import java.util.ArrayList;
29import java.util.HashSet;
30import java.util.Locale;
31import java.util.StringTokenizer;
32
33/**
34 * The purpose of this class is to split a full name into given names and last
35 * name. The logic only supports having a single last name. If the full name has
36 * multiple last names the output will be incorrect.
37 * <p>
38 * Core algorithm:
39 * <ol>
40 * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
41 * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
42 * <li>Assign the last remaining token as the last name.</li>
43 * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
44 * this word also as the last name.</li>
45 * <li>Assign the rest of the words as the "given names".</li>
46 * </ol>
47 */
48public class NameSplitter {
49
50    public static final int MAX_TOKENS = 10;
51
52    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
53    private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
54
55    // This includes simplified and traditional Chinese
56    private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
57
58    private final HashSet<String> mPrefixesSet;
59    private final HashSet<String> mSuffixesSet;
60    private final int mMaxSuffixLength;
61    private final HashSet<String> mLastNamePrefixesSet;
62    private final HashSet<String> mConjuctions;
63    private final Locale mLocale;
64    private final String mLanguage;
65
66    public static class Name {
67        public String prefix;
68        public String givenNames;
69        public String middleName;
70        public String familyName;
71        public String suffix;
72
73        public int fullNameStyle;
74
75        public String phoneticFamilyName;
76        public String phoneticMiddleName;
77        public String phoneticGivenName;
78
79        public int phoneticNameStyle;
80
81        public Name() {
82        }
83
84        public Name(String prefix, String givenNames, String middleName, String familyName,
85                String suffix) {
86            this.prefix = prefix;
87            this.givenNames = givenNames;
88            this.middleName = middleName;
89            this.familyName = familyName;
90            this.suffix = suffix;
91        }
92
93        public String getPrefix() {
94            return prefix;
95        }
96
97        public String getGivenNames() {
98            return givenNames;
99        }
100
101        public String getMiddleName() {
102            return middleName;
103        }
104
105        public String getFamilyName() {
106            return familyName;
107        }
108
109        public String getSuffix() {
110            return suffix;
111        }
112
113        public int getFullNameStyle() {
114            return fullNameStyle;
115        }
116
117        public String getPhoneticFamilyName() {
118            return phoneticFamilyName;
119        }
120
121        public String getPhoneticMiddleName() {
122            return phoneticMiddleName;
123        }
124
125        public String getPhoneticGivenName() {
126            return phoneticGivenName;
127        }
128
129        public int getPhoneticNameStyle() {
130            return phoneticNameStyle;
131        }
132
133        public void fromValues(ContentValues values) {
134            prefix = values.getAsString(StructuredName.PREFIX);
135            givenNames = values.getAsString(StructuredName.GIVEN_NAME);
136            middleName = values.getAsString(StructuredName.MIDDLE_NAME);
137            familyName = values.getAsString(StructuredName.FAMILY_NAME);
138            suffix = values.getAsString(StructuredName.SUFFIX);
139
140            Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
141            fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
142
143            phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
144            phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
145            phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
146
147            integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
148            phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
149        }
150
151        public void toValues(ContentValues values) {
152            putValueIfPresent(values, StructuredName.PREFIX, prefix);
153            putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
154            putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
155            putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
156            putValueIfPresent(values, StructuredName.SUFFIX, suffix);
157            values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
158            putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
159            putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
160            putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
161            values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
162        }
163
164        private void putValueIfPresent(ContentValues values, String name, String value) {
165            if (value != null) {
166                values.put(name, value);
167            }
168        }
169
170        public void clear() {
171            prefix = null;
172            givenNames = null;
173            middleName = null;
174            familyName = null;
175            suffix = null;
176            fullNameStyle = FullNameStyle.UNDEFINED;
177            phoneticFamilyName = null;
178            phoneticMiddleName = null;
179            phoneticGivenName = null;
180            phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
181        }
182
183        public boolean isEmpty() {
184            return TextUtils.isEmpty(givenNames)
185                    && TextUtils.isEmpty(middleName)
186                    && TextUtils.isEmpty(familyName)
187                    && TextUtils.isEmpty(suffix)
188                    && TextUtils.isEmpty(phoneticFamilyName)
189                    && TextUtils.isEmpty(phoneticMiddleName)
190                    && TextUtils.isEmpty(phoneticGivenName);
191        }
192
193        @Override
194        public String toString() {
195            return "[given: " + givenNames + " middle: " + middleName + " family: " + familyName
196                    + " ph/given: " + phoneticGivenName + " ph/middle: " + phoneticMiddleName
197                    + " ph/family: " + phoneticFamilyName + "]";
198        }
199
200    }
201
202    private static class NameTokenizer extends StringTokenizer {
203        private final String[] mTokens;
204        private int mDotBitmask;
205        private int mCommaBitmask;
206        private int mStartPointer;
207        private int mEndPointer;
208
209        public NameTokenizer(String fullName) {
210            super(fullName, " .,", true);
211
212            mTokens = new String[MAX_TOKENS];
213
214            // Iterate over tokens, skipping over empty ones and marking tokens that
215            // are followed by dots.
216            while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
217                final String token = nextToken();
218                if (token.length() > 0) {
219                    final char c = token.charAt(0);
220                    if (c == ' ') {
221                        continue;
222                    }
223                }
224
225                if (mEndPointer > 0 && token.charAt(0) == '.') {
226                    mDotBitmask |= (1 << (mEndPointer - 1));
227                } else if (mEndPointer > 0 && token.charAt(0) == ',') {
228                    mCommaBitmask |= (1 << (mEndPointer - 1));
229                } else {
230                    mTokens[mEndPointer] = token;
231                    mEndPointer++;
232                }
233            }
234        }
235
236        /**
237         * Returns true if the token is followed by a dot in the original full name.
238         */
239        public boolean hasDot(int index) {
240            return (mDotBitmask & (1 << index)) != 0;
241        }
242
243        /**
244         * Returns true if the token is followed by a comma in the original full name.
245         */
246        public boolean hasComma(int index) {
247            return (mCommaBitmask & (1 << index)) != 0;
248        }
249    }
250
251    /**
252     * Constructor.
253     *
254     * @param commonPrefixes comma-separated list of common prefixes,
255     *            e.g. "Mr, Ms, Mrs"
256     * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
257     *            e.g. "d', st, st., von"
258     * @param commonSuffixes comma-separated list of common suffixes,
259     *            e.g. "Jr, M.D., MD, D.D.S."
260     * @param commonConjunctions comma-separated list of common conjuctions,
261     *            e.g. "AND, Or"
262     */
263    public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
264            String commonSuffixes, String commonConjunctions, Locale locale) {
265        // TODO: refactor this to use <string-array> resources
266        mPrefixesSet = convertToSet(commonPrefixes);
267        mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
268        mSuffixesSet = convertToSet(commonSuffixes);
269        mConjuctions = convertToSet(commonConjunctions);
270        mLocale = locale != null ? locale : Locale.getDefault();
271        mLanguage = mLocale.getLanguage().toLowerCase();
272
273        int maxLength = 0;
274        for (String suffix : mSuffixesSet) {
275            if (suffix.length() > maxLength) {
276                maxLength = suffix.length();
277            }
278        }
279
280        mMaxSuffixLength = maxLength;
281    }
282
283    /**
284     * Converts a comma-separated list of Strings to a set of Strings. Trims strings
285     * and converts them to upper case.
286     */
287    private static HashSet<String> convertToSet(String strings) {
288        HashSet<String> set = new HashSet<String>();
289        if (strings != null) {
290            String[] split = strings.split(",");
291            for (int i = 0; i < split.length; i++) {
292                set.add(split[i].trim().toUpperCase());
293            }
294        }
295        return set;
296    }
297
298    /**
299     * Parses a full name and returns components as a list of tokens.
300     */
301    public int tokenize(String[] tokens, String fullName) {
302        if (fullName == null) {
303            return 0;
304        }
305
306        NameTokenizer tokenizer = new NameTokenizer(fullName);
307
308        if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
309            return 0;
310        }
311
312        String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
313        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
314           tokenizer.mStartPointer++;
315        }
316        int count = 0;
317        for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
318            tokens[count++] = tokenizer.mTokens[i];
319        }
320
321        return count;
322    }
323
324
325    /**
326     * Parses a full name and returns parsed components in the Name object.
327     */
328    public void split(Name name, String fullName) {
329        if (fullName == null) {
330            return;
331        }
332
333        int fullNameStyle = guessFullNameStyle(fullName);
334        if (fullNameStyle == FullNameStyle.CJK) {
335            fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
336        }
337
338        name.fullNameStyle = fullNameStyle;
339
340        switch (fullNameStyle) {
341            case FullNameStyle.CHINESE:
342                splitChineseName(name, fullName);
343                break;
344
345            case FullNameStyle.JAPANESE:
346            case FullNameStyle.KOREAN:
347                splitJapaneseOrKoreanName(name, fullName);
348                break;
349
350            default:
351                splitWesternName(name, fullName);
352        }
353    }
354
355    /**
356     * Splits a full name composed according to the Western tradition:
357     * <pre>
358     *   [prefix] given name(s) [[middle name] family name] [, suffix]
359     *   [prefix] family name, given name [middle name] [,suffix]
360     * </pre>
361     */
362    private void splitWesternName(Name name, String fullName) {
363        NameTokenizer tokens = new NameTokenizer(fullName);
364        parsePrefix(name, tokens);
365
366        // If the name consists of just one or two tokens, treat them as first/last name,
367        // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
368        if (tokens.mEndPointer > 2) {
369            parseSuffix(name, tokens);
370        }
371
372        if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
373            name.givenNames = tokens.mTokens[tokens.mStartPointer];
374        } else {
375            parseLastName(name, tokens);
376            parseMiddleName(name, tokens);
377            parseGivenNames(name, tokens);
378        }
379    }
380
381    /**
382     * Splits a full name composed according to the Chinese tradition:
383     * <pre>
384     *   [family name [middle name]] given name
385     * </pre>
386     */
387    private void splitChineseName(Name name, String fullName) {
388        StringTokenizer tokenizer = new StringTokenizer(fullName);
389        while (tokenizer.hasMoreTokens()) {
390            String token = tokenizer.nextToken();
391            if (name.givenNames == null) {
392                name.givenNames = token;
393            } else if (name.familyName == null) {
394                name.familyName = name.givenNames;
395                name.givenNames = token;
396            } else if (name.middleName == null) {
397                name.middleName = name.givenNames;
398                name.givenNames = token;
399            } else {
400                name.middleName = name.middleName + name.givenNames;
401                name.givenNames = token;
402            }
403        }
404
405        // If a single word parse that word up.
406        if (name.givenNames != null && name.familyName == null && name.middleName == null) {
407            int length = fullName.length();
408            if (length == 2) {
409                name.familyName = fullName.substring(0, 1);
410                name.givenNames = fullName.substring(1);
411            } else if (length == 3) {
412                name.familyName = fullName.substring(0, 1);
413                name.middleName = fullName.substring(1, 2);
414                name.givenNames = fullName.substring(2);
415            } else if (length == 4) {
416                name.familyName = fullName.substring(0, 2);
417                name.middleName = fullName.substring(2, 3);
418                name.givenNames = fullName.substring(3);
419            }
420
421        }
422    }
423
424    /**
425     * Splits a full name composed according to the Japanese tradition:
426     * <pre>
427     *   [family name] given name(s)
428     * </pre>
429     */
430    private void splitJapaneseOrKoreanName(Name name, String fullName) {
431        StringTokenizer tokenizer = new StringTokenizer(fullName);
432        while (tokenizer.hasMoreTokens()) {
433            String token = tokenizer.nextToken();
434            if (name.givenNames == null) {
435                name.givenNames = token;
436            } else if (name.familyName == null) {
437                name.familyName = name.givenNames;
438                name.givenNames = token;
439            } else {
440                name.givenNames += " " + token;
441            }
442        }
443    }
444
445    /**
446     * Concatenates components of a name according to the rules dictated by the name style.
447     *
448     * @param givenNameFirst is ignored for CJK display name styles
449     */
450    public String join(Name name, boolean givenNameFirst) {
451        switch (name.fullNameStyle) {
452            case FullNameStyle.CJK:
453            case FullNameStyle.CHINESE:
454            case FullNameStyle.KOREAN:
455                return join(name.familyName, name.middleName, name.givenNames, name.suffix,
456                        false, false, false);
457
458            case FullNameStyle.JAPANESE:
459                return join(name.familyName, name.middleName, name.givenNames, name.suffix,
460                        true, false, false);
461
462            default:
463                if (givenNameFirst) {
464                    return join(name.givenNames, name.middleName, name.familyName, name.suffix,
465                            true, false, true);
466                } else {
467                    return join(name.familyName, name.givenNames, name.middleName, name.suffix,
468                            true, true, true);
469                }
470        }
471    }
472
473    /**
474     * Concatenates components of the phonetic name following the CJK tradition:
475     * family name + middle name + given name(s).
476     */
477    public String joinPhoneticName(Name name) {
478        return join(name.phoneticFamilyName, name.phoneticMiddleName,
479                name.phoneticGivenName, null, true, false, false);
480    }
481
482    /**
483     * Given a name in Chinese, returns a Pinyin representation.
484     */
485    public String convertHanziToPinyin(String name) {
486
487        // TODO: move this code to HanziToPinyin and optimize
488        ArrayList<Token> tokens = HanziToPinyin.getInstance().get(name);
489        if (tokens != null) {
490            int size = tokens.size();
491            if (size != 0) {
492                StringBuilder sb = new StringBuilder();
493                for (int i = 0; i < size; i++) {
494                    String pinyin = tokens.get(i).target;
495                    if (!TextUtils.isEmpty(pinyin)) {
496                        if (sb.length() != 0) {
497                            sb.append(' ');
498                        }
499                        sb.append(pinyin);
500                    }
501                }
502                return sb.toString();
503            }
504        }
505        return null;
506    }
507
508    /**
509     * Concatenates parts of a full name inserting spaces and commas as specified.
510     */
511    private String join(String part1, String part2, String part3, String suffix,
512            boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
513        boolean hasPart1 = !TextUtils.isEmpty(part1);
514        boolean hasPart2 = !TextUtils.isEmpty(part2);
515        boolean hasPart3 = !TextUtils.isEmpty(part3);
516        boolean hasSuffix = !TextUtils.isEmpty(suffix);
517
518        boolean isSingleWord = true;
519        String singleWord = null;
520        if (hasPart1) {
521            singleWord = part1;
522        }
523
524        if (hasPart2) {
525            if (singleWord != null) {
526                isSingleWord = false;
527            } else {
528                singleWord = part2;
529            }
530        }
531
532        if (hasPart3) {
533            if (singleWord != null) {
534                isSingleWord = false;
535            } else {
536                singleWord = part3;
537            }
538        }
539
540        if (hasSuffix) {
541            if (singleWord != null) {
542                isSingleWord = false;
543            } else {
544                singleWord = normalizedSuffix(suffix);
545            }
546        }
547
548        if (isSingleWord) {
549            return singleWord;
550        }
551
552        StringBuilder sb = new StringBuilder();
553        if (hasPart1) {
554            sb.append(part1);
555        }
556
557        if (hasPart2) {
558            if (hasPart1) {
559                if (useCommaAfterPart1) {
560                    sb.append(',');
561                }
562                if (useSpace) {
563                    sb.append(' ');
564                }
565            }
566            sb.append(part2);
567        }
568
569        if (hasPart3) {
570            if (hasPart1 || hasPart2) {
571                if (useSpace) {
572                    sb.append(' ');
573                }
574            }
575            sb.append(part3);
576        }
577
578        if (hasSuffix) {
579            if (hasPart1 || hasPart2 || hasPart3) {
580                if (useCommaAfterPart3) {
581                    sb.append(',');
582                }
583                if (useSpace) {
584                    sb.append(' ');
585                }
586            }
587            sb.append(normalizedSuffix(suffix));
588        }
589
590        return sb.toString();
591    }
592
593    /**
594     * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
595     * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
596     */
597    private String normalizedSuffix(String suffix) {
598        int length = suffix.length();
599        if (length == 0 || suffix.charAt(length - 1) == '.') {
600            return suffix;
601        }
602
603        String withDot = suffix + '.';
604        if (mSuffixesSet.contains(withDot.toUpperCase())) {
605            return withDot;
606        } else {
607            return suffix;
608        }
609    }
610
611    /**
612     * If the supplied name style is undefined, returns a default based on the language,
613     * otherwise returns the supplied name style itself.
614     *
615     * @param nameStyle See {@link FullNameStyle}.
616     */
617    public int getAdjustedFullNameStyle(int nameStyle) {
618        if (nameStyle == FullNameStyle.UNDEFINED) {
619            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
620                return FullNameStyle.JAPANESE;
621            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
622                return FullNameStyle.KOREAN;
623            } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
624                return FullNameStyle.CHINESE;
625            } else {
626                return FullNameStyle.WESTERN;
627            }
628        } else if (nameStyle == FullNameStyle.CJK) {
629            if (JAPANESE_LANGUAGE.equals(mLanguage)) {
630                return FullNameStyle.JAPANESE;
631            } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
632                return FullNameStyle.KOREAN;
633            } else {
634                return FullNameStyle.CHINESE;
635            }
636        }
637        return nameStyle;
638    }
639
640    /**
641     * Parses the first word from the name if it is a prefix.
642     */
643    private void parsePrefix(Name name, NameTokenizer tokens) {
644        if (tokens.mStartPointer == tokens.mEndPointer) {
645            return;
646        }
647
648        String firstToken = tokens.mTokens[tokens.mStartPointer];
649        if (mPrefixesSet.contains(firstToken.toUpperCase())) {
650            name.prefix = firstToken;
651            tokens.mStartPointer++;
652        }
653    }
654
655    /**
656     * Parses the last word(s) from the name if it is a suffix.
657     */
658    private void parseSuffix(Name name, NameTokenizer tokens) {
659        if (tokens.mStartPointer == tokens.mEndPointer) {
660            return;
661        }
662
663        String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
664        if (lastToken.length() > mMaxSuffixLength) {
665            return;
666        }
667
668        String normalized = lastToken.toUpperCase();
669        if (mSuffixesSet.contains(normalized)) {
670            name.suffix = lastToken;
671            tokens.mEndPointer--;
672            return;
673        }
674
675        if (tokens.hasDot(tokens.mEndPointer - 1)) {
676            lastToken += '.';
677        }
678        normalized += ".";
679
680        // Take care of suffixes like M.D. and D.D.S.
681        int pos = tokens.mEndPointer - 1;
682        while (normalized.length() <= mMaxSuffixLength) {
683
684            if (mSuffixesSet.contains(normalized)) {
685                name.suffix = lastToken;
686                tokens.mEndPointer = pos;
687                return;
688            }
689
690            if (pos == tokens.mStartPointer) {
691                break;
692            }
693
694            pos--;
695            if (tokens.hasDot(pos)) {
696                lastToken = tokens.mTokens[pos] + "." + lastToken;
697            } else {
698                lastToken = tokens.mTokens[pos] + " " + lastToken;
699            }
700
701            normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
702        }
703    }
704
705    private void parseLastName(Name name, NameTokenizer tokens) {
706        if (tokens.mStartPointer == tokens.mEndPointer) {
707            return;
708        }
709
710        // If the first word is followed by a comma, assume that it's the family name
711        if (tokens.hasComma(tokens.mStartPointer)) {
712           name.familyName = tokens.mTokens[tokens.mStartPointer];
713           tokens.mStartPointer++;
714           return;
715        }
716
717        // If the second word is followed by a comma and the first word
718        // is a last name prefix as in "de Sade" and "von Cliburn", treat
719        // the first two words as the family name.
720        if (tokens.mStartPointer + 1 < tokens.mEndPointer
721                && tokens.hasComma(tokens.mStartPointer + 1)
722                && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
723            String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
724            if (tokens.hasDot(tokens.mStartPointer)) {
725                familyNamePrefix += '.';
726            }
727            name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
728            tokens.mStartPointer += 2;
729            return;
730        }
731
732        // Finally, assume that the last word is the last name
733        name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
734        tokens.mEndPointer--;
735
736        // Take care of last names like "de Sade" and "von Cliburn"
737        if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
738            String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
739            if (isFamilyNamePrefix(lastNamePrefix)) {
740                if (tokens.hasDot(tokens.mEndPointer - 1)) {
741                    lastNamePrefix += '.';
742                }
743                name.familyName = lastNamePrefix + " " + name.familyName;
744                tokens.mEndPointer--;
745            }
746        }
747    }
748
749    /**
750     * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
751     */
752    private boolean isFamilyNamePrefix(String word) {
753        final String normalized = word.toUpperCase();
754
755        return mLastNamePrefixesSet.contains(normalized)
756                || mLastNamePrefixesSet.contains(normalized + ".");
757    }
758
759
760    private void parseMiddleName(Name name, NameTokenizer tokens) {
761        if (tokens.mStartPointer == tokens.mEndPointer) {
762            return;
763        }
764
765        if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
766            if ((tokens.mEndPointer - tokens.mStartPointer) == 2
767                    || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
768                            toUpperCase())) {
769                name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
770                if (tokens.hasDot(tokens.mEndPointer - 1)) {
771                    name.middleName += '.';
772                }
773                tokens.mEndPointer--;
774            }
775        }
776    }
777
778    private void parseGivenNames(Name name, NameTokenizer tokens) {
779        if (tokens.mStartPointer == tokens.mEndPointer) {
780            return;
781        }
782
783        if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
784            name.givenNames = tokens.mTokens[tokens.mStartPointer];
785        } else {
786            StringBuilder sb = new StringBuilder();
787            for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
788                if (i != tokens.mStartPointer) {
789                    sb.append(' ');
790                }
791                sb.append(tokens.mTokens[i]);
792                if (tokens.hasDot(i)) {
793                    sb.append('.');
794                }
795            }
796            name.givenNames = sb.toString();
797        }
798    }
799
800    /**
801     * Makes the best guess at the expected full name style based on the character set
802     * used in the supplied name.  If the phonetic name is also supplied, tries to
803     * differentiate between Chinese, Japanese and Korean based on the alphabet used
804     * for the phonetic name.
805     */
806    public void guessNameStyle(Name name) {
807        guessFullNameStyle(name);
808        if (FullNameStyle.CJK == name.fullNameStyle) {
809            name.fullNameStyle = getAdjustedFullNameStyle(name.fullNameStyle);
810        }
811        guessPhoneticNameStyle(name);
812        name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
813                name.phoneticNameStyle);
814    }
815
816    /**
817     * Updates the display name style according to the phonetic name style if we
818     * were unsure about display name style based on the name components, but
819     * phonetic name makes it more definitive.
820     */
821    public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
822        if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
823            if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
824                if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
825                    return FullNameStyle.JAPANESE;
826                } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
827                    return FullNameStyle.KOREAN;
828                }
829                if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
830                    return FullNameStyle.CHINESE;
831                }
832            }
833        }
834        return nameStyle;
835    }
836
837    /**
838     * Makes the best guess at the expected full name style based on the character set
839     * used in the supplied name.
840     */
841    private void guessFullNameStyle(NameSplitter.Name name) {
842        if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
843            return;
844        }
845
846        int bestGuess = guessFullNameStyle(name.givenNames);
847        // A mix of Hanzi and latin chars are common in China, so we have to go through all names
848        // if the name is not JANPANESE or KOREAN.
849        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
850                && bestGuess != FullNameStyle.WESTERN) {
851            name.fullNameStyle = bestGuess;
852            return;
853        }
854
855        int guess = guessFullNameStyle(name.familyName);
856        if (guess != FullNameStyle.UNDEFINED) {
857            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
858                name.fullNameStyle = guess;
859                return;
860            }
861            bestGuess = guess;
862        }
863
864        guess = guessFullNameStyle(name.middleName);
865        if (guess != FullNameStyle.UNDEFINED) {
866            if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
867                name.fullNameStyle = guess;
868                return;
869            }
870            bestGuess = guess;
871        }
872
873        name.fullNameStyle = bestGuess;
874    }
875
876    public int guessFullNameStyle(String name) {
877        if (name == null) {
878            return FullNameStyle.UNDEFINED;
879        }
880
881        int nameStyle = FullNameStyle.UNDEFINED;
882        int length = name.length();
883        int offset = 0;
884        while (offset < length) {
885            int codePoint = Character.codePointAt(name, offset);
886            if (Character.isLetter(codePoint)) {
887                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
888
889                if (!isLatinUnicodeBlock(unicodeBlock)) {
890
891                    if (isCJKUnicodeBlock(unicodeBlock)) {
892                        // We don't know if this is Chinese, Japanese or Korean -
893                        // trying to figure out by looking at other characters in the name
894                        return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
895                    }
896
897                    if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
898                        return FullNameStyle.JAPANESE;
899                    }
900
901                    if (isKoreanUnicodeBlock(unicodeBlock)) {
902                        return FullNameStyle.KOREAN;
903                    }
904                }
905                nameStyle = FullNameStyle.WESTERN;
906            }
907            offset += Character.charCount(codePoint);
908        }
909        return nameStyle;
910    }
911
912    private int guessCJKNameStyle(String name, int offset) {
913        int length = name.length();
914        while (offset < length) {
915            int codePoint = Character.codePointAt(name, offset);
916            if (Character.isLetter(codePoint)) {
917                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
918                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
919                    return FullNameStyle.JAPANESE;
920                }
921                if (isKoreanUnicodeBlock(unicodeBlock)) {
922                    return FullNameStyle.KOREAN;
923                }
924            }
925            offset += Character.charCount(codePoint);
926        }
927
928        return FullNameStyle.CJK;
929    }
930
931    private void guessPhoneticNameStyle(NameSplitter.Name name) {
932        if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
933            return;
934        }
935
936        int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
937        if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
938            name.phoneticNameStyle = bestGuess;
939            return;
940        }
941
942        int guess = guessPhoneticNameStyle(name.phoneticGivenName);
943        if (guess != FullNameStyle.UNDEFINED) {
944            if (guess != FullNameStyle.CJK) {
945                name.phoneticNameStyle = guess;
946                return;
947            }
948            bestGuess = guess;
949        }
950
951        guess = guessPhoneticNameStyle(name.phoneticMiddleName);
952        if (guess != FullNameStyle.UNDEFINED) {
953            if (guess != FullNameStyle.CJK) {
954                name.phoneticNameStyle = guess;
955                return;
956            }
957            bestGuess = guess;
958        }
959    }
960
961    public int guessPhoneticNameStyle(String name) {
962        if (name == null) {
963            return PhoneticNameStyle.UNDEFINED;
964        }
965
966        int nameStyle = PhoneticNameStyle.UNDEFINED;
967        int length = name.length();
968        int offset = 0;
969        while (offset < length) {
970            int codePoint = Character.codePointAt(name, offset);
971            if (Character.isLetter(codePoint)) {
972                UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
973                if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
974                    return PhoneticNameStyle.JAPANESE;
975                }
976                if (isKoreanUnicodeBlock(unicodeBlock)) {
977                    return PhoneticNameStyle.KOREAN;
978                }
979                if (isLatinUnicodeBlock(unicodeBlock)) {
980                    return PhoneticNameStyle.PINYIN;
981                }
982            }
983            offset += Character.charCount(codePoint);
984        }
985
986        return nameStyle;
987    }
988
989    private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
990        return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
991                unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
992                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
993                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
994                unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
995    }
996
997    private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
998        return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
999                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1000                || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1001                || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
1002                || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
1003                || block == UnicodeBlock.CJK_COMPATIBILITY
1004                || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
1005                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
1006                || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
1007    }
1008
1009    private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
1010        return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
1011                unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
1012                unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
1013    }
1014
1015    private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
1016        return unicodeBlock == UnicodeBlock.KATAKANA ||
1017                unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
1018                unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
1019                unicodeBlock == UnicodeBlock.HIRAGANA;
1020    }
1021}
1022