1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.dialer.dialpad;
18
19import android.text.TextUtils;
20
21import com.android.dialer.dialpad.SmartDialCache.ContactNumber;
22import com.google.common.annotations.VisibleForTesting;
23import com.google.common.base.Preconditions;
24import com.google.common.collect.Lists;
25
26import java.util.ArrayList;
27import java.util.HashSet;
28import java.util.Set;
29
30/**
31 * Prefix trie where the only allowed characters are the characters '0' to '9'. Multiple contacts
32 * can occupy the same nodes.
33 *
34 * <p>Provides functions to get all contacts that lie on or below a node.
35 * This is useful for retrieving all contacts that start with that prefix.</p>
36 *
37 * <p>Also contains special logic to handle NANP numbers in the case that the user is from a region
38 * that uses NANP numbers.</p>
39 */
40public class SmartDialTrie {
41    @VisibleForTesting
42    static class ParseInfo {
43        byte[] indexes;
44        int nthFirstTokenPos;
45        int nthLastTokenPos;
46    }
47
48    /**
49     * A country code and integer offset pair that represents the parsed country code in a
50     * phone number. The country code is a string containing the numeric country-code prefix in
51     * a phone number (e.g. 1 or 852). The offset is the integer position of where the country code
52     * ends in a phone number.
53     */
54    public static class CountryCodeWithOffset {
55        public static final CountryCodeWithOffset NO_COUNTRY_CODE = new CountryCodeWithOffset(0,
56                "");
57
58        final String countryCode;
59        final int offset;
60
61        public CountryCodeWithOffset(int offset, String countryCode) {
62            this.countryCode = countryCode;
63            this.offset = offset;
64        }
65    }
66
67    final Node mRoot = new Node();
68    private int mSize = 0;
69    private final char[] mCharacterMap;
70    private final boolean mFormatNanp;
71
72    private static final int LAST_TOKENS_FOR_INITIALS = 2;
73    private static final int FIRST_TOKENS_FOR_INITIALS = 2;
74
75    // Static set of all possible country codes in the world
76    public static Set<String> sCountryCodes = null;
77
78    public SmartDialTrie() {
79        // Use the latin letter to digit map by default if none provided
80        this(SmartDialNameMatcher.LATIN_LETTERS_TO_DIGITS, false);
81    }
82
83    /**
84     * Creates a new SmartDialTrie.
85     *
86     * @param formatNanp True if inserted numbers are to be treated as NANP numbers
87     * such that numbers are automatically broken up by country prefix and area code.
88     */
89    @VisibleForTesting
90    public SmartDialTrie(boolean formatNanp) {
91        this(SmartDialNameMatcher.LATIN_LETTERS_TO_DIGITS, formatNanp);
92    }
93
94    /**
95     * Creates a new SmartDialTrie.
96     *
97     * @param charMap Mapping of characters to digits to use when inserting names into the trie.
98     * @param formatNanp True if inserted numbers are to be treated as NANP numbers
99     * such that numbers are automatically broken up by country prefix and area code.
100     */
101    public SmartDialTrie(char[] charMap, boolean formatNanp) {
102        mCharacterMap = charMap;
103        mFormatNanp = formatNanp;
104    }
105
106    /**
107     * Returns all contacts in the prefix tree that correspond to this prefix.
108     */
109    public ArrayList<ContactNumber> getAllWithPrefix(CharSequence prefix) {
110        final ArrayList<ContactNumber> result = Lists.newArrayList();
111        if (TextUtils.isEmpty(prefix)) {
112            return result;
113        }
114        Node current = mRoot;
115        for (int i = 0; i < prefix.length(); i++) {
116            char ch = prefix.charAt(i);
117            current = current.getChild(ch, false);
118            if (current == null) {
119                return result;
120            }
121        }
122        // return all contacts that correspond to this prefix
123        getAll(current, result);
124        return result;
125    }
126
127    /**
128     * Returns all the contacts located at and under the provided node(including its children)
129     */
130    private void getAll(Node root, ArrayList<ContactNumber> output) {
131        if (root == null) {
132            return;
133        }
134        if (root.getContents() != null) {
135            output.addAll(root.getContents());
136        }
137        for (int i = 0; i < root.getChildrenSize(); i++) {
138            getAll(root.getChild(i, false), output);
139        }
140    }
141
142    /**
143     * Adds the display name and phone number of a contact into the prefix trie.
144     *
145     * @param contact Desired contact to add
146     */
147    public void put(ContactNumber contact) {
148        // Preconvert the display name into a byte array containing indexes to avoid having to
149        // remap each character over multiple passes
150        final ParseInfo info = parseToIndexes(contact.displayName, FIRST_TOKENS_FOR_INITIALS,
151                LAST_TOKENS_FOR_INITIALS);
152        putForPrefix(contact, mRoot, info, 0, true);
153        // We don't need to do the same for phone numbers since we only make one pass over them.
154        // Strip the calling code from the phone number here
155        if (!TextUtils.isEmpty(contact.phoneNumber)) {
156            // Handle country codes for numbers with a + prefix
157            final CountryCodeWithOffset code = getOffsetWithoutCountryCode(contact.phoneNumber);
158            if (code.offset != 0) {
159                putNumber(contact, contact.phoneNumber, code.offset);
160            }
161            if ((code.countryCode.equals("1") || code.offset == 0) && mFormatNanp) {
162                // Special case handling for NANP numbers (1-xxx-xxx-xxxx)
163                final String stripped = SmartDialNameMatcher.normalizeNumber(
164                        contact.phoneNumber, code.offset);
165                if (!TextUtils.isEmpty(stripped)) {
166                    int trunkPrefixOffset = 0;
167                    if (stripped.charAt(0) == '1') {
168                        // If the number starts with 1, we can assume its the trunk prefix.
169                        trunkPrefixOffset = 1;
170                    }
171                    if (stripped.length() == (10 + trunkPrefixOffset)) {
172                        // Valid NANP number
173                        if (trunkPrefixOffset != 0) {
174                            // Add the digits that follow the 1st digit (trunk prefix)
175                            // If trunkPrefixOffset is 0, we will add the number as is anyway, so
176                            // don't bother.
177                            putNumber(contact, stripped, trunkPrefixOffset);
178                        }
179                        // Add the digits that follow the next 3 digits (area code)
180                        putNumber(contact, stripped, 3 + trunkPrefixOffset);
181                    }
182                }
183            }
184            putNumber(contact, contact.phoneNumber, 0);
185        }
186        mSize++;
187    }
188
189    public static CountryCodeWithOffset getOffsetWithoutCountryCode(String number) {
190        if (!TextUtils.isEmpty(number)) {
191            if (number.charAt(0) == '+') {
192                // check for international code here
193                for (int i = 1; i <= 1 + 3; i++) {
194                    if (number.length() <= i) break;
195                    final String countryCode = number.substring(1, i);
196                    if (isValidCountryCode(countryCode)) {
197                        return new CountryCodeWithOffset(i, countryCode);
198                    }
199                }
200            }
201        }
202        return CountryCodeWithOffset.NO_COUNTRY_CODE;
203    }
204
205    /**
206     * Used by SmartDialNameMatcher to determine which character in the phone number to start
207     * the matching process from for a NANP formatted number.
208     *
209     * @param number Raw phone number
210     * @return An empty array if the provided number does not appear to be an NANP number,
211     * and an array containing integer offsets for the number (starting after the '1' prefix,
212     * and the area code prefix respectively.
213     */
214    public static int[] getOffsetForNANPNumbers(String number) {
215        int validDigits = 0;
216        boolean hasPrefix = false;
217        int firstOffset = 0; // Tracks the location of the first digit after the '1' prefix
218        int secondOffset = 0; // Tracks the location of the first digit after the area code
219        for (int i = 0; i < number.length(); i++) {
220            final char ch = number.charAt(i);
221            if (ch >= '0' && ch <= '9') {
222                if (validDigits == 0) {
223                    // Check the first digit to see if it is 1
224                    if (ch == '1') {
225                        if (hasPrefix) {
226                            // Prefix has two '1's in a row. Invalid number, since area codes
227                            // cannot start with 1, so just bail
228                            break;
229                        }
230                        hasPrefix = true;
231                        continue;
232                    }
233                }
234                validDigits++;
235                if (validDigits == 1) {
236                    // Found the first digit after the country code
237                    firstOffset = i;
238                } else if (validDigits == 4) {
239                    // Found the first digit after the area code
240                    secondOffset = i;
241                }
242            }
243
244        }
245        if (validDigits == 10) {
246            return hasPrefix ? new int[] {firstOffset, secondOffset} : new int[] {secondOffset};
247        }
248        return new int[0];
249    }
250
251    /**
252     * Converts the given characters into a byte array of index and returns it together with offset
253     * information in a {@link ParseInfo} data structure.
254     * @param chars Characters to convert into indexes
255     * @param firstNTokens The first n tokens we want the offset for
256     * @param lastNTokens The last n tokens we want the offset for
257     */
258    @VisibleForTesting
259    ParseInfo parseToIndexes(CharSequence chars, int firstNTokens, int lastNTokens) {
260        final int length = chars.length();
261        final byte[] result = new byte[length];
262        final ArrayList<Integer> offSets = new ArrayList<Integer>();
263        char c;
264        int tokenCount = 0;
265        boolean atSeparator = true;
266        for (int i = 0; i < length; i++) {
267            c = SmartDialNameMatcher.remapAccentedChars(chars.charAt(i));
268            if (c >= 'a' && c <= 'z' || c >= '0' && c <= '9') {
269                if (atSeparator) {
270                    tokenCount++;
271                }
272                atSeparator = false;
273                if (c <= '9') {
274                    // 0-9
275                    result[i] = (byte) (c - '0');
276                } else {
277                    // a-z
278                    result[i] = (byte) (mCharacterMap[c - 'a'] - '0');
279                }
280            } else {
281                // Found the last character of the current token
282                if (!atSeparator) {
283                    offSets.add(i);
284                }
285                result[i] = -1;
286                atSeparator = true;
287            }
288        }
289
290        final ParseInfo info = new ParseInfo();
291        info.indexes = result;
292        info.nthFirstTokenPos = offSets.size() >= firstNTokens ? offSets.get(firstNTokens - 1) :
293                length - 1;
294        info.nthLastTokenPos = offSets.size() >= lastNTokens ? offSets.get(offSets.size() -
295                lastNTokens) : 0;
296        return info;
297    }
298
299    /**
300     * Puts a phone number and its associated contact into the prefix trie.
301     *
302     * @param contact - Contact to add to the trie
303     * @param phoneNumber - Phone number of the contact
304     * @param offSet - The nth character of the phone number to start from
305     */
306    private void putNumber(ContactNumber contact, String phoneNumber, int offSet) {
307        Preconditions.checkArgument(offSet < phoneNumber.length());
308        Node current = mRoot;
309        final int length = phoneNumber.length();
310        char ch;
311        for (int i = offSet; i < length; i++) {
312            ch = phoneNumber.charAt(i);
313            if (ch >= '0' && ch <= '9') {
314                current = current.getChild(ch, true);
315            }
316        }
317        current.add(contact);
318    }
319
320    /**
321     * Place an contact into the trie using at the provided node using the provided prefix. Uses as
322     * the input prefix a byte array instead of a CharSequence, as we will be traversing the array
323     * multiple times and it is more efficient to pre-convert the prefix into indexes before hand.
324     * Adds initial matches for the first token, and the last N tokens in the name.
325     *
326     * @param contact Contact to put
327     * @param root Root node to use as the starting point
328     * @param parseInfo ParseInfo data structure containing the converted byte array, as well as
329     *        token offsets that determine which tokens should have entries added for initial
330     *        search
331     * @param start - Starting index of the byte array
332     * @param isFullName If true, prefix will be treated as a full name and everytime a new name
333     *        token is encountered, the rest of the name segment is added into the trie.
334     */
335    private void putForPrefix(ContactNumber contact, Node root, ParseInfo info, int start,
336            boolean isFullName) {
337        final boolean addInitialMatches = (start >= info.nthLastTokenPos ||
338                start <= info.nthFirstTokenPos);
339        final byte[] indexes = info.indexes;
340        Node current = root;
341        Node initialNode = root;
342        final int length = indexes.length;
343        boolean atSeparator = true;
344        byte index;
345        for (int i = start; i < length; i++) {
346            index = indexes[i];
347            if (index > -1) {
348                if (atSeparator) {
349                    atSeparator = false;
350                    // encountered a new name token, so add this token into the tree starting from
351                    // the root node
352                    if (initialNode != this.mRoot) {
353                        if (isFullName) {
354                            putForPrefix(contact, this.mRoot, info, i, false);
355                        }
356                        if (addInitialMatches &&
357                                (i >= info.nthLastTokenPos || i <= info.nthFirstTokenPos) &&
358                                initialNode != root) {
359                            putForPrefix(contact, initialNode, info, i, false);
360                        }
361                    }
362                    // Set initial node to the node indexed by the first character of the current
363                    // prefix
364                    if (initialNode == root) {
365                        initialNode = initialNode.getChild(index, true);
366                    }
367                }
368                current = current.getChild(index, true);
369            } else {
370                atSeparator = true;
371            }
372        }
373        current.add(contact);
374    }
375
376    /* Used only for testing to verify we insert the correct number of entries into the trie */
377    @VisibleForTesting
378    int numEntries() {
379        final ArrayList<ContactNumber> result = Lists.newArrayList();
380        getAll(mRoot, result);
381        return result.size();
382    }
383
384
385    @VisibleForTesting
386    public int size() {
387        return mSize;
388    }
389
390    @VisibleForTesting
391    /* package */ static class Node {
392        Node[] mChildren;
393        private ArrayList<ContactNumber> mContents;
394
395        public Node() {
396            // don't allocate array or contents unless needed
397        }
398
399        public int getChildrenSize() {
400            if (mChildren == null) {
401                return -1;
402            }
403            return mChildren.length;
404        }
405
406        /**
407         * Returns a specific child of the current node.
408         *
409         * @param index Index of the child to return.
410         * @param createIfDoesNotExist Whether or not to create a node in that child slot if one
411         *        does not already currently exist.
412         * @return The existing or newly created child, or {@literal null} if the child does not
413         *         exist and createIfDoesNotExist is false.
414         */
415        public Node getChild(int index, boolean createIfDoesNotExist) {
416            if (createIfDoesNotExist) {
417                if (mChildren == null) {
418                    mChildren = new Node[10];
419                }
420                if (mChildren[index] == null) {
421                    mChildren[index] = new Node();
422                }
423            } else {
424                if (mChildren == null) {
425                    return null;
426                }
427            }
428            return mChildren[index];
429        }
430
431        /**
432         * Same as getChild(int index, boolean createIfDoesNotExist), but takes a character from '0'
433         * to '9' as an index.
434         */
435        public Node getChild(char index, boolean createIfDoesNotExist) {
436            return getChild(index - '0', createIfDoesNotExist);
437        }
438
439        public void add(ContactNumber contact) {
440            if (mContents == null) {
441                mContents = Lists.newArrayList();
442            }
443            mContents.add(contact);
444        }
445
446        public ArrayList<ContactNumber> getContents() {
447            return mContents;
448        }
449    }
450
451    private static boolean isValidCountryCode(String countryCode) {
452        if (sCountryCodes == null) {
453            sCountryCodes = initCountryCodes();
454        }
455        return sCountryCodes.contains(countryCode);
456    }
457
458    private static Set<String> initCountryCodes() {
459        final HashSet<String> result = new HashSet<String>();
460        result.add("1");
461        result.add("7");
462        result.add("20");
463        result.add("27");
464        result.add("30");
465        result.add("31");
466        result.add("32");
467        result.add("33");
468        result.add("34");
469        result.add("36");
470        result.add("39");
471        result.add("40");
472        result.add("41");
473        result.add("43");
474        result.add("44");
475        result.add("45");
476        result.add("46");
477        result.add("47");
478        result.add("48");
479        result.add("49");
480        result.add("51");
481        result.add("52");
482        result.add("53");
483        result.add("54");
484        result.add("55");
485        result.add("56");
486        result.add("57");
487        result.add("58");
488        result.add("60");
489        result.add("61");
490        result.add("62");
491        result.add("63");
492        result.add("64");
493        result.add("65");
494        result.add("66");
495        result.add("81");
496        result.add("82");
497        result.add("84");
498        result.add("86");
499        result.add("90");
500        result.add("91");
501        result.add("92");
502        result.add("93");
503        result.add("94");
504        result.add("95");
505        result.add("98");
506        result.add("211");
507        result.add("212");
508        result.add("213");
509        result.add("216");
510        result.add("218");
511        result.add("220");
512        result.add("221");
513        result.add("222");
514        result.add("223");
515        result.add("224");
516        result.add("225");
517        result.add("226");
518        result.add("227");
519        result.add("228");
520        result.add("229");
521        result.add("230");
522        result.add("231");
523        result.add("232");
524        result.add("233");
525        result.add("234");
526        result.add("235");
527        result.add("236");
528        result.add("237");
529        result.add("238");
530        result.add("239");
531        result.add("240");
532        result.add("241");
533        result.add("242");
534        result.add("243");
535        result.add("244");
536        result.add("245");
537        result.add("246");
538        result.add("247");
539        result.add("248");
540        result.add("249");
541        result.add("250");
542        result.add("251");
543        result.add("252");
544        result.add("253");
545        result.add("254");
546        result.add("255");
547        result.add("256");
548        result.add("257");
549        result.add("258");
550        result.add("260");
551        result.add("261");
552        result.add("262");
553        result.add("263");
554        result.add("264");
555        result.add("265");
556        result.add("266");
557        result.add("267");
558        result.add("268");
559        result.add("269");
560        result.add("290");
561        result.add("291");
562        result.add("297");
563        result.add("298");
564        result.add("299");
565        result.add("350");
566        result.add("351");
567        result.add("352");
568        result.add("353");
569        result.add("354");
570        result.add("355");
571        result.add("356");
572        result.add("357");
573        result.add("358");
574        result.add("359");
575        result.add("370");
576        result.add("371");
577        result.add("372");
578        result.add("373");
579        result.add("374");
580        result.add("375");
581        result.add("376");
582        result.add("377");
583        result.add("378");
584        result.add("379");
585        result.add("380");
586        result.add("381");
587        result.add("382");
588        result.add("385");
589        result.add("386");
590        result.add("387");
591        result.add("389");
592        result.add("420");
593        result.add("421");
594        result.add("423");
595        result.add("500");
596        result.add("501");
597        result.add("502");
598        result.add("503");
599        result.add("504");
600        result.add("505");
601        result.add("506");
602        result.add("507");
603        result.add("508");
604        result.add("509");
605        result.add("590");
606        result.add("591");
607        result.add("592");
608        result.add("593");
609        result.add("594");
610        result.add("595");
611        result.add("596");
612        result.add("597");
613        result.add("598");
614        result.add("599");
615        result.add("670");
616        result.add("672");
617        result.add("673");
618        result.add("674");
619        result.add("675");
620        result.add("676");
621        result.add("677");
622        result.add("678");
623        result.add("679");
624        result.add("680");
625        result.add("681");
626        result.add("682");
627        result.add("683");
628        result.add("685");
629        result.add("686");
630        result.add("687");
631        result.add("688");
632        result.add("689");
633        result.add("690");
634        result.add("691");
635        result.add("692");
636        result.add("800");
637        result.add("808");
638        result.add("850");
639        result.add("852");
640        result.add("853");
641        result.add("855");
642        result.add("856");
643        result.add("870");
644        result.add("878");
645        result.add("880");
646        result.add("881");
647        result.add("882");
648        result.add("883");
649        result.add("886");
650        result.add("888");
651        result.add("960");
652        result.add("961");
653        result.add("962");
654        result.add("963");
655        result.add("964");
656        result.add("965");
657        result.add("966");
658        result.add("967");
659        result.add("968");
660        result.add("970");
661        result.add("971");
662        result.add("972");
663        result.add("973");
664        result.add("974");
665        result.add("975");
666        result.add("976");
667        result.add("977");
668        result.add("979");
669        result.add("992");
670        result.add("993");
671        result.add("994");
672        result.add("995");
673        result.add("996");
674        result.add("998");
675        return result;
676    }
677}
678