1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import android.provider.ContactsContract.FullNameStyle;
20
21import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
22import com.android.providers.contacts.SearchIndexManager.IndexBuilder;
23
24import java.util.Arrays;
25import java.util.Comparator;
26import java.util.Iterator;
27
28/**
29 * Given a full name, constructs all possible variants of the name.
30 */
31public abstract class NameLookupBuilder {
32
33    private static final int MAX_NAME_TOKENS = 4;
34
35    private final NameSplitter mSplitter;
36    private String[][] mNicknameClusters = new String[MAX_NAME_TOKENS][];
37    private StringBuilder mStringBuilder = new StringBuilder();
38    private String[] mNames = new String[NameSplitter.MAX_TOKENS];
39
40    private static final int[] KOREAN_JAUM_CONVERT_MAP = {
41        // JAUM in Hangul Compatibility Jamo area 0x3131 ~ 0x314E to
42        // in Hangul Jamo area 0x1100 ~ 0x1112
43        0x1100, // 0x3131 HANGUL LETTER KIYEOK
44        0x1101, // 0x3132 HANGUL LETTER SSANGKIYEOK
45        0x00,   // 0x3133 HANGUL LETTER KIYEOKSIOS (Ignored)
46        0x1102, // 0x3134 HANGUL LETTER NIEUN
47        0x00,   // 0x3135 HANGUL LETTER NIEUNCIEUC (Ignored)
48        0x00,   // 0x3136 HANGUL LETTER NIEUNHIEUH (Ignored)
49        0x1103, // 0x3137 HANGUL LETTER TIKEUT
50        0x1104, // 0x3138 HANGUL LETTER SSANGTIKEUT
51        0x1105, // 0x3139 HANGUL LETTER RIEUL
52        0x00,   // 0x313A HANGUL LETTER RIEULKIYEOK (Ignored)
53        0x00,   // 0x313B HANGUL LETTER RIEULMIEUM (Ignored)
54        0x00,   // 0x313C HANGUL LETTER RIEULPIEUP (Ignored)
55        0x00,   // 0x313D HANGUL LETTER RIEULSIOS (Ignored)
56        0x00,   // 0x313E HANGUL LETTER RIEULTHIEUTH (Ignored)
57        0x00,   // 0x313F HANGUL LETTER RIEULPHIEUPH (Ignored)
58        0x00,   // 0x3140 HANGUL LETTER RIEULHIEUH (Ignored)
59        0x1106, // 0x3141 HANGUL LETTER MIEUM
60        0x1107, // 0x3142 HANGUL LETTER PIEUP
61        0x1108, // 0x3143 HANGUL LETTER SSANGPIEUP
62        0x00,   // 0x3144 HANGUL LETTER PIEUPSIOS (Ignored)
63        0x1109, // 0x3145 HANGUL LETTER SIOS
64        0x110A, // 0x3146 HANGUL LETTER SSANGSIOS
65        0x110B, // 0x3147 HANGUL LETTER IEUNG
66        0x110C, // 0x3148 HANGUL LETTER CIEUC
67        0x110D, // 0x3149 HANGUL LETTER SSANGCIEUC
68        0x110E, // 0x314A HANGUL LETTER CHIEUCH
69        0x110F, // 0x314B HANGUL LETTER KHIEUKH
70        0x1110, // 0x314C HANGUL LETTER THIEUTH
71        0x1111, // 0x314D HANGUL LETTER PHIEUPH
72        0x1112  // 0x314E HANGUL LETTER HIEUH
73    };
74
75    public NameLookupBuilder(NameSplitter splitter) {
76        mSplitter = splitter;
77    }
78
79    /**
80     * Inserts a name lookup record with the supplied column values.
81     */
82    protected abstract void insertNameLookup(long rawContactId, long dataId, int lookupType,
83            String string);
84
85    /**
86     * Returns common nickname cluster IDs for a given name. For example, it
87     * will return the same value for "Robert", "Bob" and "Rob". Some names belong to multiple
88     * clusters, e.g. Leo could be Leonard or Leopold.
89     *
90     * May return null.
91     *
92     * @param normalizedName A normalized first name, see {@link NameNormalizer#normalize}.
93     */
94    protected abstract String[] getCommonNicknameClusters(String normalizedName);
95
96    /**
97     * Inserts name lookup records for the given structured name.
98     */
99    public void insertNameLookup(long rawContactId, long dataId, String name, int fullNameStyle) {
100        int tokenCount = mSplitter.tokenize(mNames, name);
101        if (tokenCount == 0) {
102            return;
103        }
104
105        for (int i = 0; i < tokenCount; i++) {
106            mNames[i] = normalizeName(mNames[i]);
107        }
108
109        boolean tooManyTokens = tokenCount > MAX_NAME_TOKENS;
110        if (tooManyTokens) {
111            insertNameVariant(rawContactId, dataId, tokenCount, NameLookupType.NAME_EXACT, true);
112
113            // Favor longer parts of the name
114            Arrays.sort(mNames, 0, tokenCount, new Comparator<String>() {
115
116                public int compare(String s1, String s2) {
117                    return s2.length() - s1.length();
118                }
119            });
120
121            // Insert a collation key for each extra word - useful for contact filtering
122            // and suggestions
123            String firstToken = mNames[0];
124            for (int i = MAX_NAME_TOKENS; i < tokenCount; i++) {
125                mNames[0] = mNames[i];
126                insertCollationKey(rawContactId, dataId, MAX_NAME_TOKENS);
127            }
128            mNames[0] = firstToken;
129
130            tokenCount = MAX_NAME_TOKENS;
131        }
132
133        // Phase I: insert all variants not involving nickname clusters
134        for (int i = 0; i < tokenCount; i++) {
135            mNicknameClusters[i] = getCommonNicknameClusters(mNames[i]);
136        }
137
138        insertNameVariants(rawContactId, dataId, 0, tokenCount, !tooManyTokens, true);
139        insertNicknamePermutations(rawContactId, dataId, 0, tokenCount);
140    }
141
142    public void appendToSearchIndex(IndexBuilder builder, String name, int fullNameStyle) {
143        int tokenCount = mSplitter.tokenize(mNames, name);
144        if (tokenCount == 0) {
145            return;
146        }
147
148        for (int i = 0; i < tokenCount; i++) {
149            builder.appendName(mNames[i]);
150        }
151
152        appendNameShorthandLookup(builder, name, fullNameStyle);
153        appendNameLookupForLocaleBasedName(builder, name, fullNameStyle);
154    }
155
156    /**
157     * Insert more name indexes according to locale specifies.
158     */
159    private void appendNameLookupForLocaleBasedName(IndexBuilder builder,
160            String fullName, int fullNameStyle) {
161        if (fullNameStyle == FullNameStyle.KOREAN) {
162            NameSplitter.Name name = new NameSplitter.Name();
163            mSplitter.split(name, fullName, fullNameStyle);
164            if (name.givenNames != null) {
165                builder.appendName(name.givenNames);
166                appendKoreanNameConsonantsLookup(builder, name.givenNames);
167            }
168            appendKoreanNameConsonantsLookup(builder, fullName);
169        }
170    }
171
172    /**
173     * Inserts Korean lead consonants records of name for the given structured name.
174     */
175    private void appendKoreanNameConsonantsLookup(IndexBuilder builder, String name) {
176        int position = 0;
177        int consonantLength = 0;
178        int character;
179
180        final int stringLength = name.length();
181        mStringBuilder.setLength(0);
182        do {
183            character = name.codePointAt(position++);
184            if ((character == 0x20) || (character == 0x2c) || (character == 0x2E)) {
185                // Skip spaces, commas and periods.
186                continue;
187            }
188            // Exclude characters that are not in Korean leading consonants area
189            // and Korean characters area.
190            if ((character < 0x1100) || (character > 0x1112 && character < 0x3131) ||
191                    (character > 0x314E && character < 0xAC00) ||
192                    (character > 0xD7A3)) {
193                break;
194            }
195            // Decompose and take a only lead-consonant for composed Korean characters.
196            if (character >= 0xAC00) {
197                // Lead consonant = "Lead consonant base" +
198                //      (character - "Korean Character base") /
199                //          ("Lead consonant count" * "middle Vowel count")
200                character = 0x1100 + (character - 0xAC00) / 588;
201            } else if (character >= 0x3131) {
202                // Hangul Compatibility Jamo area 0x3131 ~ 0x314E :
203                // Convert to Hangul Jamo area 0x1100 ~ 0x1112
204                if (character - 0x3131 >= KOREAN_JAUM_CONVERT_MAP.length) {
205                    // This is not lead-consonant
206                    break;
207                }
208                character = KOREAN_JAUM_CONVERT_MAP[character - 0x3131];
209                if (character == 0) {
210                    // This is not lead-consonant
211                    break;
212                }
213            }
214            mStringBuilder.appendCodePoint(character);
215            consonantLength++;
216        } while (position < stringLength);
217
218        // At least, insert consonants when Korean characters are two or more.
219        // Only one character cases are covered by NAME_COLLATION_KEY
220        if (consonantLength > 1) {
221            builder.appendName(mStringBuilder.toString());
222        }
223    }
224
225    protected String normalizeName(String name) {
226        return NameNormalizer.normalize(name);
227    }
228
229    /**
230     * Inserts all name variants based on permutations of tokens between
231     * fromIndex and toIndex
232     *
233     * @param initiallyExact true if the name without permutations is the exact
234     *            original name
235     * @param buildCollationKey true if a collation key makes sense for these
236     *            permutations (false if at least one of the tokens is a
237     *            nickname cluster key)
238     */
239    private void insertNameVariants(long rawContactId, long dataId, int fromIndex, int toIndex,
240            boolean initiallyExact, boolean buildCollationKey) {
241        if (fromIndex == toIndex) {
242            insertNameVariant(rawContactId, dataId, toIndex,
243                    initiallyExact ? NameLookupType.NAME_EXACT : NameLookupType.NAME_VARIANT,
244                    buildCollationKey);
245            return;
246        }
247
248        // Swap the first token with each other token (including itself, which is a no-op)
249        // and recursively insert all permutations for the remaining tokens
250        String firstToken = mNames[fromIndex];
251        for (int i = fromIndex; i < toIndex; i++) {
252            mNames[fromIndex] = mNames[i];
253            mNames[i] = firstToken;
254
255            insertNameVariants(rawContactId, dataId, fromIndex + 1, toIndex,
256                    initiallyExact && i == fromIndex, buildCollationKey);
257
258            mNames[i] = mNames[fromIndex];
259            mNames[fromIndex] = firstToken;
260        }
261    }
262
263    /**
264     * Inserts a single name variant and optionally its collation key counterpart.
265     */
266    private void insertNameVariant(long rawContactId, long dataId, int tokenCount,
267            int lookupType, boolean buildCollationKey) {
268        mStringBuilder.setLength(0);
269
270        for (int i = 0; i < tokenCount; i++) {
271            if (i != 0) {
272                mStringBuilder.append('.');
273            }
274            mStringBuilder.append(mNames[i]);
275        }
276
277        insertNameLookup(rawContactId, dataId, lookupType, mStringBuilder.toString());
278
279        if (buildCollationKey) {
280            insertCollationKey(rawContactId, dataId, tokenCount);
281        }
282    }
283
284    /**
285     * Inserts a collation key for the current contents of {@link #mNames}.
286     */
287    private void insertCollationKey(long rawContactId, long dataId, int tokenCount) {
288        mStringBuilder.setLength(0);
289
290        for (int i = 0; i < tokenCount; i++) {
291            mStringBuilder.append(mNames[i]);
292        }
293
294        insertNameLookup(rawContactId, dataId, NameLookupType.NAME_COLLATION_KEY,
295                mStringBuilder.toString());
296    }
297
298    /**
299     * For all tokens that correspond to nickname clusters, substitutes each cluster key
300     * and inserts all permutations with that key.
301     */
302    private void insertNicknamePermutations(long rawContactId, long dataId, int fromIndex,
303            int tokenCount) {
304        for (int i = fromIndex; i < tokenCount; i++) {
305            String[] clusters = mNicknameClusters[i];
306            if (clusters != null) {
307                String token = mNames[i];
308                for (int j = 0; j < clusters.length; j++) {
309                    mNames[i] = clusters[j];
310
311                    // Insert all permutations with this nickname cluster
312                    insertNameVariants(rawContactId, dataId, 0, tokenCount, false, false);
313
314                    // Repeat recursively for other nickname clusters
315                    insertNicknamePermutations(rawContactId, dataId, i + 1, tokenCount);
316                }
317                mNames[i] = token;
318            }
319        }
320    }
321
322    /**
323     * Insert more name indexes according to locale specifies for those locales
324     * for which we have alternative shorthand name methods (eg, Pinyin for
325     * Chinese, Romaji for Japanese).
326     */
327    public void appendNameShorthandLookup(IndexBuilder builder, String name, int fullNameStyle) {
328        Iterator<String> it =
329                ContactLocaleUtils.getInstance().getNameLookupKeys(name, fullNameStyle);
330        if (it != null) {
331            while (it.hasNext()) {
332                builder.appendName(it.next());
333            }
334        }
335    }
336}
337