1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
20
21import android.provider.ContactsContract.FullNameStyle;
22
23import java.util.Arrays;
24import java.util.Comparator;
25import java.util.Iterator;
26
27/**
28 * Given a full name, constructs all possible variants of the name.
29 */
30public abstract class NameLookupBuilder {
31
32    private static final int MAX_NAME_TOKENS = 4;
33
34    private final NameSplitter mSplitter;
35    private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][];
36    private StringBuilder mStringBuilder = new StringBuilder();
37    private String[] mNames = new String[NameSplitter.MAX_TOKENS];
38
39    private static int[] KOREAN_JAUM_CONVERT_MAP = {
40        // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to
41        // in Hangul Jamo area 0x1100 ~ 0x1112
42        0x1100, // 0x3131 HANGUL LETTER KIYEOK
43        0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK
44        0x00,   // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored)
45        0x1102, // 0x3134 HANGUL LETTER NIEUN
46        0x00,   // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored)
47        0x00,   // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored)
48        0x1103, // 0x3137 HANGUL LETTER TIKEUT
49        0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT
50        0x1105, // 0x3139 HANGUL LETTER RIEUL
51        0x00,   // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored)
52        0x00,   // 0x313B HANGUL LETTER RIEULMIEUM (Ignored)
53        0x00,   // 0x313C HANGUL LETTER RIEULPIEUP (Ignored)
54        0x00,   // 0x313D HANGUL LETTER RIEULSIOS (Ignored)
55        0x00,   // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored)
56        0x00,   // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored)
57        0x00,   // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored)
58        0x1106, // 0x3141 HANGUL LETTER MIEUM
59        0x1107, // 0x3142 HANGUL LETTER PIEUP
60        0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP
61        0x00,   // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored)
62        0x1109, // 0x3145 HANGUL LETTER SIOS
63        0x110A, // 0x3146 HANGUL LETTER SSANGSIOS
64        0x110B, // 0x3147 HANGUL LETTER IEUNG
65        0x110C, // 0x3148 HANGUL LETTER CIEUC
66        0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC
67        0x110E, // 0x314A HANGUL LETTER CHIEUCH
68        0x110F, // 0x314B HANGUL LETTER KHIEUKH
69        0x1110, // 0x314C HANGUL LETTER THIEUTH
70        0x1111, // 0x314D HANGUL LETTER PHIEUPH
71        0x1112  // 0x314E HANGUL LETTER HIEUH
72    };
73    private static int KOREAN_JAUM_CONVERT_MAP_COUNT = 30;
74
75
76    public NameLookupBuilder(NameSplitter splitter) {
77        mSplitter = splitter;
78    }
79
80    /**
81     * Inserts a name lookup record with the supplied column values.
82     */
83    protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType,
84            String string);
85
86    /**
87     * Returns common nickname cluster IDs for a given name. For example, it
88     * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple
89     * clusters, e.g. Leo could be Leonard or Leopold.
90     *
91     * May return null.
92     *
93     * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}.
94     */
95    protected abstract String[] getCommonNicknameClusters(String normalizedName);
96
97    /**
98     * Inserts name lookup records for the given structured name.
99     */
100    public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) {
101        int tokenCount = mSplitter.tokenize(mNames, name);
102        if (tokenCount == 0) {
103            return;
104        }
105
106        for (int i = 0; i < tokenCount; i++) {
107            mNames[i] = normalizeName(mNames[i]);
108        }
109
110        boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS;
111        if (tooManyTokens) {
112            insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true);
113
114            // Favor longer parts of the name
115            Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() {
116
117                public int compare(String s1, String s2) {
118                    return s2.length() - s1.length();
119                }
120            });
121
122            // Insert a collation key for each extra word - useful for contact filtering
123            // and suggestions
124            String firstToken = mNames[0];
125            for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) {
126                mNames[0] = mNames[i];
127                insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS);
128            }
129            mNames[0] = firstToken;
130
131            tokenCount = MAX_NAME_TOKENS;
132        }
133
134        // Phase I: insert all variants not involving nickname clusters
135        for (int i = 0; i < tokenCount; i++) {
136            mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]);
137        }
138
139        insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true);
140        insertNicknamePermutations(rawContactId, dataId, 0, tokenCount);
141        insertNameShorthandLookup(rawContactId, dataId, name, fullNameStyle);
142        insertLocaleBasedSpecificLookup(rawContactId, dataId, name, fullNameStyle);
143    }
144
145    private void insertLocaleBasedSpecificLookup(long rawContactId, long dataId, String name,
146            int fullNameStyle) {
147        if (fullNameStyle == FullNameStyle.KOREAN) {
148            insertKoreanNameConsonantsLookup(rawContactId, dataId, name);
149        }
150    }
151
152    /**
153     * Inserts Korean lead consonants records of name for the given structured name.
154     */
155    private void insertKoreanNameConsonantsLookup(long rawContactId, long dataId, String name) {
156        int position = 0;
157        int consonantLength = 0;
158        int character;
159
160        final int stringLength = name.length();
161        mStringBuilder.setLength(0);
162        do {
163            character = name.codePointAt(position++);
164            if (character == 0x20) {
165                // Skip spaces.
166                continue;
167            }
168            // Exclude characters that are not in Korean leading consonants area
169            // and Korean characters area.
170            if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) ||
171                    (character > 0x314E && character < 0xAC00) ||
172                    (character > 0xD7A3)) {
173                break;
174            }
175            // Decompose and take a only lead-consonant for composed Korean characters.
176            if (character >= 0xAC00) {
177                // Lead consonant = "Lead consonant base" +
178                //      (character - "Korean Character base") /
179                //          ("Lead consonant count" * "middle Vowel count")
180                character = 0x1100 + (character - 0xAC00) / 588;
181            } else if (character >= 0x3131) {
182                // Hangul Compatibility Jamo area 0x3131 ~ 0x314E :
183                // Convert to Hangul Jamo area 0x1100 ~ 0x1112
184                if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP_COUNT) {
185                    // This is not lead-consonant
186                    break;
187                }
188                character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131];
189                if (character == 0) {
190                    // This is not lead-consonant
191                    break;
192                }
193            }
194            mStringBuilder.appendCodePoint(character);
195            consonantLength++;
196        } while (position < stringLength);
197
198        // At least, insert consonants when Korean characters are two or more.
199        // Only one character cases are covered by NAME_COLLATION_KEY
200        if (consonantLength > 1) {
201            insertNameLookup(rawContactId, dataId, NameLookupType.NAME_CONSONANTS,
202                    normalizeName(mStringBuilder.toString()));
203        }
204    }
205
206    protected String normalizeName(String name) {
207        return NameNormalizer.normalize(name);
208    }
209
210    /**
211     * Inserts all name variants based on permutations of tokens between
212     * fromIndex and toIndex
213     *
214     * @param initiallyExact true if the name without permutations is the exact
215     *            original name
216     * @param buildCollationKey true if a collation key makes sense for these
217     *            permutations (false if at least one of the tokens is a
218     *            nickname cluster key)
219     */
220    private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex,
221            boolean initiallyExact, boolean buildCollationKey) {
222        if (fromIndex == toIndex) {
223            insertNameVariant(rawContactId, dataId, toIndex,
224                    initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT,
225                    buildCollationKey);
226            return;
227        }
228
229        // Swap the first token with each other token (including itself, which is a no-op)
230        // and recursively insert all permutations for the remaining tokens
231        String firstToken = mNames[fromIndex];
232        for (int i = fromIndex; i < toIndex; i++) {
233            mNames[fromIndex] = mNames[i];
234            mNames[i] = firstToken;
235
236            insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex,
237                    initiallyExact && i == fromIndex, buildCollationKey);
238
239            mNames[i] = mNames[fromIndex];
240            mNames[fromIndex] = firstToken;
241        }
242    }
243
244    /**
245     * Inserts a single name variant and optionally its collation key counterpart.
246     */
247    private void insertNameVariant(long rawContactId, long dataId, int tokenCount,
248            int lookupType, boolean buildCollationKey) {
249        mStringBuilder.setLength(0);
250
251        for (int i = 0; i < tokenCount; i++) {
252            if (i != 0) {
253                mStringBuilder.append('.');
254            }
255            mStringBuilder.append(mNames[i]);
256        }
257
258        insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString());
259
260        if (buildCollationKey) {
261            insertCollationKey(rawContactId, dataId, tokenCount);
262        }
263    }
264
265    /**
266     * Inserts a collation key for the current contents of {@link #mNames}.
267     */
268    private void insertCollationKey(long rawContactId, long dataId, int tokenCount) {
269        mStringBuilder.setLength(0);
270
271        for (int i = 0; i < tokenCount; i++) {
272            mStringBuilder.append(mNames[i]);
273        }
274
275        insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY,
276                mStringBuilder.toString());
277    }
278
279    /**
280     * For all tokens that correspond to nickname clusters, substitutes each cluster key
281     * and inserts all permutations with that key.
282     */
283    private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex,
284            int tokenCount) {
285        for (int i = fromIndex; i < tokenCount; i++) {
286            String[] clusters = mNicknameClusters[i];
287            if (clusters != null) {
288                String token = mNames[i];
289                for (int j = 0; j < clusters.length; j++) {
290                    mNames[i] = clusters[j];
291
292                    // Insert all permutations with this nickname cluster
293                    insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false);
294
295                    // Repeat recursively for other nickname clusters
296                    insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount);
297                }
298                mNames[i] = token;
299            }
300        }
301    }
302
303    private void insertNameShorthandLookup(long rawContactId, long dataId, String name,
304            int fullNameStyle) {
305        Iterator<String> it =
306                ContactLocaleUtils.getIntance().getNameLookupKeys(name, fullNameStyle);
307        if (it != null) {
308            while (it.hasNext()) {
309                String key = it.next();
310                insertNameLookup(rawContactId, dataId, NameLookupType.NAME_SHORTHAND,
311                        normalizeName(key));
312            }
313        }
314    }
315}
316