1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16package com.android.providers.contacts;
17
18import android.content.ContentValues;
19import android.database.Cursor;
20import android.database.sqlite.SQLiteDatabase;
21import android.os.SystemClock;
22import android.provider.ContactsContract.CommonDataKinds.Email;
23import android.provider.ContactsContract.CommonDataKinds.Nickname;
24import android.provider.ContactsContract.CommonDataKinds.Organization;
25import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
26import android.provider.ContactsContract.Data;
27import android.provider.ContactsContract.ProviderStatus;
28import android.provider.ContactsContract.RawContacts;
29import android.text.TextUtils;
30import android.util.Log;
31
32import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
33import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
34import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
35import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
36import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
37import com.google.android.collect.Lists;
38import com.google.common.annotations.VisibleForTesting;
39
40import java.util.ArrayList;
41import java.util.HashSet;
42import java.util.List;
43import java.util.Set;
44import java.util.regex.Pattern;
45
46/**
47 * Maintains a search index for comprehensive contact search.
48 */
49public class SearchIndexManager {
50    private static final String TAG = "ContactsFTS";
51
52    private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
53
54    public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
55    private static final int SEARCH_INDEX_VERSION = 1;
56
57    private static final class ContactIndexQuery {
58        public static final String[] COLUMNS = {
59                Data.CONTACT_ID,
60                MimetypesColumns.MIMETYPE,
61                Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
62                Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
63                Data.DATA12, Data.DATA13, Data.DATA14
64        };
65
66        public static final int MIMETYPE = 1;
67    }
68
69    public static class IndexBuilder {
70        public static final int SEPARATOR_SPACE = 0;
71        public static final int SEPARATOR_PARENTHESES = 1;
72        public static final int SEPARATOR_SLASH = 2;
73        public static final int SEPARATOR_COMMA = 3;
74
75        private StringBuilder mSbContent = new StringBuilder();
76        private StringBuilder mSbName = new StringBuilder();
77        private StringBuilder mSbTokens = new StringBuilder();
78        private StringBuilder mSbElementContent = new StringBuilder();
79        private HashSet<String> mUniqueElements = new HashSet<String>();
80        private Cursor mCursor;
81
82        void setCursor(Cursor cursor) {
83            this.mCursor = cursor;
84        }
85
86        void reset() {
87            mSbContent.setLength(0);
88            mSbTokens.setLength(0);
89            mSbName.setLength(0);
90            mSbElementContent.setLength(0);
91            mUniqueElements.clear();
92        }
93
94        public String getContent() {
95            return mSbContent.length() == 0 ? null : mSbContent.toString();
96        }
97
98        public String getName() {
99            return mSbName.length() == 0 ? null : mSbName.toString();
100        }
101
102        public String getTokens() {
103            return mSbTokens.length() == 0 ? null : mSbTokens.toString();
104        }
105
106        public String getString(String columnName) {
107            return mCursor.getString(mCursor.getColumnIndex(columnName));
108        }
109
110        public int getInt(String columnName) {
111            return mCursor.getInt(mCursor.getColumnIndex(columnName));
112        }
113
114        @Override
115        public String toString() {
116            return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens;
117        }
118
119        public void commit() {
120            if (mSbElementContent.length() != 0) {
121                String content = mSbElementContent.toString().replace('\n', ' ');
122                if (!mUniqueElements.contains(content)) {
123                    if (mSbContent.length() != 0) {
124                        mSbContent.append('\n');
125                    }
126                    mSbContent.append(content);
127                    mUniqueElements.add(content);
128                }
129                mSbElementContent.setLength(0);
130            }
131        }
132
133        public void appendContentFromColumn(String columnName) {
134            appendContentFromColumn(columnName, SEPARATOR_SPACE);
135        }
136
137        public void appendContentFromColumn(String columnName, int format) {
138            appendContent(getString(columnName), format);
139        }
140
141        public void appendContent(String value) {
142            appendContent(value, SEPARATOR_SPACE);
143        }
144
145        private void appendContent(String value, int format) {
146            if (TextUtils.isEmpty(value)) {
147                return;
148            }
149
150            switch (format) {
151                case SEPARATOR_SPACE:
152                    if (mSbElementContent.length() > 0) {
153                        mSbElementContent.append(' ');
154                    }
155                    mSbElementContent.append(value);
156                    break;
157
158                case SEPARATOR_SLASH:
159                    mSbElementContent.append('/').append(value);
160                    break;
161
162                case SEPARATOR_PARENTHESES:
163                    if (mSbElementContent.length() > 0) {
164                        mSbElementContent.append(' ');
165                    }
166                    mSbElementContent.append('(').append(value).append(')');
167                    break;
168
169                case SEPARATOR_COMMA:
170                    if (mSbElementContent.length() > 0) {
171                        mSbElementContent.append(", ");
172                    }
173                    mSbElementContent.append(value);
174                    break;
175            }
176        }
177
178        public void appendToken(String token) {
179            if (TextUtils.isEmpty(token)) {
180                return;
181            }
182
183            if (mSbTokens.length() != 0) {
184                mSbTokens.append(' ');
185            }
186            mSbTokens.append(token);
187        }
188
189        public void appendNameFromColumn(String columnName) {
190            appendName(getString(columnName));
191        }
192
193        public void appendName(String name) {
194            if (TextUtils.isEmpty(name)) {
195                return;
196            }
197            // First, put the original name.
198            appendNameInternal(name);
199
200            // Then, if the name contains more than one FTS token, put each token into the index
201            // too.
202            //
203            // This is to make names with special characters searchable, such as "double-barrelled"
204            // "L'Image".
205            //
206            // Here's how it works:
207            // Because we "normalize" names when putting into the index, if we only put
208            // "double-barrelled", the index will only contain "doublebarrelled".
209            // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
210            // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
211            // but the second one doesn't (because we only do the prefix match), so
212            // "doublebarrelled" doesn't match.
213            // So, here, we put each token in a name into the index too.  In the case above,
214            // we put also "double" and "barrelled".
215            // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
216            // will all match "double-barrelled".
217            final List<String> nameParts = splitIntoFtsTokens(name);
218            if (nameParts.size() > 1) {
219                for (String namePart : nameParts) {
220                    if (!TextUtils.isEmpty(namePart)) {
221                        appendNameInternal(namePart);
222                    }
223                }
224            }
225        }
226
227        /**
228         * Normalize a name and add to {@link #mSbName}
229         */
230        private void appendNameInternal(String name) {
231            if (mSbName.length() != 0) {
232                mSbName.append(' ');
233            }
234            mSbName.append(NameNormalizer.normalize(name));
235        }
236    }
237
238    private final ContactsProvider2 mContactsProvider;
239    private final ContactsDatabaseHelper mDbHelper;
240    private StringBuilder mSb = new StringBuilder();
241    private IndexBuilder mIndexBuilder = new IndexBuilder();
242    private ContentValues mValues = new ContentValues();
243    private String[] mSelectionArgs1 = new String[1];
244
245    public SearchIndexManager(ContactsProvider2 contactsProvider) {
246        this.mContactsProvider = contactsProvider;
247        mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
248    }
249
250    public void updateIndex(boolean force) {
251        if (force) {
252            setSearchIndexVersion(0);
253        } else {
254            if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
255                return;
256            }
257        }
258        SQLiteDatabase db = mDbHelper.getWritableDatabase();
259        db.beginTransaction();
260        try {
261            // We do a version check again, because the version might have been modified after
262            // the first check.  We need to do the check again in a transaction to make sure.
263            if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
264                rebuildIndex(db);
265                setSearchIndexVersion(SEARCH_INDEX_VERSION);
266                db.setTransactionSuccessful();
267            }
268        } finally {
269            db.endTransaction();
270        }
271    }
272
273    private void rebuildIndex(SQLiteDatabase db) {
274        mContactsProvider.setProviderStatus(ProviderStatus.STATUS_UPGRADING);
275        final long start = SystemClock.elapsedRealtime();
276        int count = 0;
277        try {
278            mDbHelper.createSearchIndexTable(db, true);
279            count = buildAndInsertIndex(db, null);
280        } finally {
281            mContactsProvider.setProviderStatus(ProviderStatus.STATUS_NORMAL);
282
283            final long end = SystemClock.elapsedRealtime();
284            Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
285                    + count + " contacts");
286        }
287    }
288
289    public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
290        if (VERBOSE_LOGGING) {
291            Log.v(TAG, "Updating search index for " + contactIds.size() +
292                    " contacts / " + rawContactIds.size() + " raw contacts");
293        }
294        StringBuilder sb = new StringBuilder();
295        sb.append("(");
296        if (!contactIds.isEmpty()) {
297            // Select all raw contacts that belong to all contacts in contactIds
298            sb.append(RawContacts.CONTACT_ID + " IN (");
299            sb.append(TextUtils.join(",", contactIds));
300            sb.append(')');
301        }
302        if (!rawContactIds.isEmpty()) {
303            if (!contactIds.isEmpty()) {
304                sb.append(" OR ");
305            }
306            // Select all raw contacts that belong to the same contact as all raw contacts
307            // in rawContactIds. For every raw contact in rawContactIds that we are updating
308            // the index for, we need to rebuild the search index for all raw contacts belonging
309            // to the same contact, because we can only update the search index on a per-contact
310            // basis.
311            sb.append(RawContacts.CONTACT_ID + " IN " +
312                    "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS +
313                    " WHERE " + RawContactsColumns.CONCRETE_ID + " IN (");
314            sb.append(TextUtils.join(",", rawContactIds));
315            sb.append("))");
316        }
317
318        sb.append(")");
319
320        // The selection to select raw_contacts.
321        final String rawContactsSelection = sb.toString();
322
323        // Remove affected search_index rows.
324        final SQLiteDatabase db = mDbHelper.getWritableDatabase();
325        final int deleted = db.delete(Tables.SEARCH_INDEX,
326                SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
327                    RawContacts.CONTACT_ID +
328                    " FROM " + Tables.RAW_CONTACTS +
329                    " WHERE " + rawContactsSelection +
330                    ")"
331                , null);
332
333        // Then rebuild index for them.
334        final int count = buildAndInsertIndex(db, rawContactsSelection);
335
336        if (VERBOSE_LOGGING) {
337            Log.v(TAG, "Updated search index for " + count + " contacts");
338        }
339    }
340
341    private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
342        mSb.setLength(0);
343        mSb.append(Data.CONTACT_ID + ", ");
344        mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
345        mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
346        mSb.append(" THEN -4 ");
347        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
348        mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
349        mSb.append(" THEN -3 ");
350        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
351        mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
352        mSb.append(" THEN -2");
353        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
354        mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
355        mSb.append(" THEN -1");
356        mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
357        mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
358
359        int count = 0;
360        Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
361                selection, null, null, null, mSb.toString());
362        mIndexBuilder.setCursor(cursor);
363        mIndexBuilder.reset();
364        try {
365            long currentContactId = -1;
366            while (cursor.moveToNext()) {
367                long contactId = cursor.getLong(0);
368                if (contactId != currentContactId) {
369                    if (currentContactId != -1) {
370                        insertIndexRow(db, currentContactId, mIndexBuilder);
371                        count++;
372                    }
373                    currentContactId = contactId;
374                    mIndexBuilder.reset();
375                }
376                String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
377                DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
378                if (dataRowHandler.hasSearchableData()) {
379                    dataRowHandler.appendSearchableData(mIndexBuilder);
380                    mIndexBuilder.commit();
381                }
382            }
383            if (currentContactId != -1) {
384                insertIndexRow(db, currentContactId, mIndexBuilder);
385                count++;
386            }
387        } finally {
388            cursor.close();
389        }
390        return count;
391    }
392
393    private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
394        mValues.clear();
395        mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
396        mValues.put(SearchIndexColumns.NAME, builder.getName());
397        mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
398        mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
399        db.insert(Tables.SEARCH_INDEX, null, mValues);
400    }
401    private int getSearchIndexVersion() {
402        return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
403    }
404
405    private void setSearchIndexVersion(int version) {
406        mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
407    }
408
409    /**
410     * Token separator that matches SQLite's "simple" tokenizer.
411     * - Unicode codepoints >= 128: Everything
412     * - Unicode codepoints < 128: Alphanumeric and "_"
413     * - Everything else is a separator of tokens
414     */
415    private static final Pattern FTS_TOKEN_SEPARATOR_RE =
416            Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
417
418    /**
419     * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
420     */
421    @VisibleForTesting
422    static List<String> splitIntoFtsTokens(String s) {
423        final ArrayList<String> ret = Lists.newArrayList();
424        for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
425            if (!TextUtils.isEmpty(token)) {
426                ret.add(token);
427            }
428        }
429        return ret;
430    }
431
432    /**
433     * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
434     * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
435     * returned as a String.
436     * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
437     * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
438     */
439    public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
440        final StringBuilder result = new StringBuilder();
441        for (String token : splitIntoFtsTokens(query)) {
442            ftsQueryBuilder.addToken(result, token);
443        }
444        return result.toString();
445    }
446
447    public static abstract class FtsQueryBuilder {
448        public abstract void addToken(StringBuilder builder, String token);
449
450        /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
451        public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
452
453        /**
454         * Scopes each token to a column and normalizes the name.
455         * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
456         */
457        public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
458                new ScopedNameNormalizingBuilder();
459
460        /**
461         * Scopes each token to a the content column and also for name with normalization.
462         * Also adds a user-defined expression to each token. This allows common criteria to be
463         * concatenated to each token.
464         * Example (commonCriteria=" OR tokens:123*"):
465         * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
466         */
467        public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
468            return new FtsQueryBuilder() {
469                @Override
470                public void addToken(StringBuilder builder, String token) {
471                    if (builder.length() != 0) builder.append(' ');
472
473                    builder.append("content:");
474                    builder.append(token);
475                    builder.append("* ");
476
477                    final String normalizedToken = NameNormalizer.normalize(token);
478                    if (!TextUtils.isEmpty(normalizedToken)) {
479                        builder.append(" OR name:");
480                        builder.append(normalizedToken);
481                        builder.append('*');
482                    }
483
484                    builder.append(commonCriteria);
485                }
486            };
487        }
488    }
489
490    private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
491        @Override
492        public void addToken(StringBuilder builder, String token) {
493            if (builder.length() != 0) builder.append(' ');
494
495            // the token could be empty (if the search query was "_"). we should still emit it
496            // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
497            builder.append(NameNormalizer.normalize(token));
498            builder.append('*');
499        }
500    }
501
502    private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
503        @Override
504        public void addToken(StringBuilder builder, String token) {
505            if (builder.length() != 0) builder.append(' ');
506
507            builder.append("content:");
508            builder.append(token);
509            builder.append('*');
510
511            final String normalizedToken = NameNormalizer.normalize(token);
512            if (!TextUtils.isEmpty(normalizedToken)) {
513                builder.append(" OR name:");
514                builder.append(normalizedToken);
515                builder.append('*');
516            }
517
518            builder.append(" OR tokens:");
519            builder.append(token);
520            builder.append("*");
521        }
522    }
523}
524