1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16package com.android.providers.contacts;
17
18import android.content.ContentValues;
19import android.database.Cursor;
20import android.database.sqlite.SQLiteDatabase;
21import android.os.SystemClock;
22import android.provider.ContactsContract.CommonDataKinds.Email;
23import android.provider.ContactsContract.CommonDataKinds.Nickname;
24import android.provider.ContactsContract.CommonDataKinds.Organization;
25import android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
26import android.provider.ContactsContract.Data;
27import android.provider.ContactsContract.ProviderStatus;
28import android.provider.ContactsContract.RawContacts;
29import android.text.TextUtils;
30import android.util.Log;
31
32import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
33import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
34import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
35import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
36import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
37import com.google.android.collect.Lists;
38import com.google.common.annotations.VisibleForTesting;
39
40import java.util.ArrayList;
41import java.util.HashSet;
42import java.util.List;
43import java.util.Set;
44import java.util.regex.Pattern;
45
46/**
47 * Maintains a search index for comprehensive contact search.
48 */
49public class SearchIndexManager {
50    private static final String TAG = "ContactsFTS";
51
52    private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
53
54    public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
55    private static final int SEARCH_INDEX_VERSION = 1;
56
57    private static final class ContactIndexQuery {
58        public static final String[] COLUMNS = {
59                Data.CONTACT_ID,
60                MimetypesColumns.MIMETYPE,
61                Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
62                Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
63                Data.DATA12, Data.DATA13, Data.DATA14
64        };
65
66        public static final int MIMETYPE = 1;
67    }
68
69    public static class IndexBuilder {
70        public static final int SEPARATOR_SPACE = 0;
71        public static final int SEPARATOR_PARENTHESES = 1;
72        public static final int SEPARATOR_SLASH = 2;
73        public static final int SEPARATOR_COMMA = 3;
74
75        private StringBuilder mSbContent = new StringBuilder();
76        private StringBuilder mSbName = new StringBuilder();
77        private StringBuilder mSbTokens = new StringBuilder();
78        private StringBuilder mSbElementContent = new StringBuilder();
79        private HashSet<String> mUniqueElements = new HashSet<String>();
80        private Cursor mCursor;
81
82        void setCursor(Cursor cursor) {
83            this.mCursor = cursor;
84        }
85
86        void reset() {
87            mSbContent.setLength(0);
88            mSbTokens.setLength(0);
89            mSbName.setLength(0);
90            mSbElementContent.setLength(0);
91            mUniqueElements.clear();
92        }
93
94        public String getContent() {
95            return mSbContent.length() == 0 ? null : mSbContent.toString();
96        }
97
98        public String getName() {
99            return mSbName.length() == 0 ? null : mSbName.toString();
100        }
101
102        public String getTokens() {
103            return mSbTokens.length() == 0 ? null : mSbTokens.toString();
104        }
105
106        public String getString(String columnName) {
107            return mCursor.getString(mCursor.getColumnIndex(columnName));
108        }
109
110        public int getInt(String columnName) {
111            return mCursor.getInt(mCursor.getColumnIndex(columnName));
112        }
113
114        @Override
115        public String toString() {
116            return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens;
117        }
118
119        public void commit() {
120            if (mSbElementContent.length() != 0) {
121                String content = mSbElementContent.toString().replace('\n', ' ');
122                if (!mUniqueElements.contains(content)) {
123                    if (mSbContent.length() != 0) {
124                        mSbContent.append('\n');
125                    }
126                    mSbContent.append(content);
127                    mUniqueElements.add(content);
128                }
129                mSbElementContent.setLength(0);
130            }
131        }
132
133        public void appendContentFromColumn(String columnName) {
134            appendContentFromColumn(columnName, SEPARATOR_SPACE);
135        }
136
137        public void appendContentFromColumn(String columnName, int format) {
138            appendContent(getString(columnName), format);
139        }
140
141        public void appendContent(String value) {
142            appendContent(value, SEPARATOR_SPACE);
143        }
144
145        private void appendContent(String value, int format) {
146            if (TextUtils.isEmpty(value)) {
147                return;
148            }
149
150            switch (format) {
151                case SEPARATOR_SPACE:
152                    if (mSbElementContent.length() > 0) {
153                        mSbElementContent.append(' ');
154                    }
155                    mSbElementContent.append(value);
156                    break;
157
158                case SEPARATOR_SLASH:
159                    mSbElementContent.append('/').append(value);
160                    break;
161
162                case SEPARATOR_PARENTHESES:
163                    if (mSbElementContent.length() > 0) {
164                        mSbElementContent.append(' ');
165                    }
166                    mSbElementContent.append('(').append(value).append(')');
167                    break;
168
169                case SEPARATOR_COMMA:
170                    if (mSbElementContent.length() > 0) {
171                        mSbElementContent.append(", ");
172                    }
173                    mSbElementContent.append(value);
174                    break;
175            }
176        }
177
178        public void appendToken(String token) {
179            if (TextUtils.isEmpty(token)) {
180                return;
181            }
182
183            if (mSbTokens.length() != 0) {
184                mSbTokens.append(' ');
185            }
186            mSbTokens.append(token);
187        }
188
189        public void appendNameFromColumn(String columnName) {
190            appendName(getString(columnName));
191        }
192
193        public void appendName(String name) {
194            if (TextUtils.isEmpty(name)) {
195                return;
196            }
197            // First, put the original name.
198            appendNameInternal(name);
199
200            // Then, if the name contains more than one FTS token, put each token into the index
201            // too.
202            //
203            // This is to make names with special characters searchable, such as "double-barrelled"
204            // "L'Image".
205            //
206            // Here's how it works:
207            // Because we "normalize" names when putting into the index, if we only put
208            // "double-barrelled", the index will only contain "doublebarrelled".
209            // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
210            // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
211            // but the second one doesn't (because we only do the prefix match), so
212            // "doublebarrelled" doesn't match.
213            // So, here, we put each token in a name into the index too.  In the case above,
214            // we put also "double" and "barrelled".
215            // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
216            // will all match "double-barrelled".
217            final List<String> nameParts = splitIntoFtsTokens(name);
218            if (nameParts.size() > 1) {
219                for (String namePart : nameParts) {
220                    if (!TextUtils.isEmpty(namePart)) {
221                        appendNameInternal(namePart);
222                    }
223                }
224            }
225        }
226
227        /**
228         * Normalize a name and add to {@link #mSbName}
229         */
230        private void appendNameInternal(String name) {
231            if (mSbName.length() != 0) {
232                mSbName.append(' ');
233            }
234            mSbName.append(NameNormalizer.normalize(name));
235        }
236    }
237
238    private final ContactsProvider2 mContactsProvider;
239    private final ContactsDatabaseHelper mDbHelper;
240    private StringBuilder mSb = new StringBuilder();
241    private IndexBuilder mIndexBuilder = new IndexBuilder();
242    private ContentValues mValues = new ContentValues();
243    private String[] mSelectionArgs1 = new String[1];
244
245    public SearchIndexManager(ContactsProvider2 contactsProvider) {
246        this.mContactsProvider = contactsProvider;
247        mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
248    }
249
250    public void updateIndex(boolean force) {
251        if (force) {
252            setSearchIndexVersion(0);
253        } else {
254            if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
255                return;
256            }
257        }
258        SQLiteDatabase db = mDbHelper.getWritableDatabase();
259        db.beginTransaction();
260        try {
261            // We do a version check again, because the version might have been modified after
262            // the first check.  We need to do the check again in a transaction to make sure.
263            if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
264                rebuildIndex(db);
265                setSearchIndexVersion(SEARCH_INDEX_VERSION);
266                db.setTransactionSuccessful();
267            }
268        } finally {
269            db.endTransaction();
270        }
271    }
272
273    private void rebuildIndex(SQLiteDatabase db) {
274        mContactsProvider.setProviderStatus(ProviderStatus.STATUS_UPGRADING);
275        final long start = SystemClock.elapsedRealtime();
276        int count = 0;
277        try {
278            mDbHelper.createSearchIndexTable(db, true);
279            count = buildAndInsertIndex(db, null);
280        } finally {
281            mContactsProvider.setProviderStatus(ProviderStatus.STATUS_NORMAL);
282
283            final long end = SystemClock.elapsedRealtime();
284            Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
285                    + count + " contacts");
286        }
287    }
288
289    public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
290        if (VERBOSE_LOGGING) {
291            Log.v(TAG, "Updating search index for " + contactIds.size() +
292                    " contacts / " + rawContactIds.size() + " raw contacts");
293        }
294        StringBuilder sb = new StringBuilder();
295        sb.append("(");
296        if (!contactIds.isEmpty()) {
297            sb.append(RawContacts.CONTACT_ID + " IN (");
298            for (Long contactId : contactIds) {
299                sb.append(contactId).append(",");
300            }
301            sb.setLength(sb.length() - 1);
302            sb.append(')');
303        }
304
305        if (!rawContactIds.isEmpty()) {
306            if (!contactIds.isEmpty()) {
307                sb.append(" OR ");
308            }
309            sb.append(RawContactsColumns.CONCRETE_ID + " IN (");
310            for (Long rawContactId : rawContactIds) {
311                sb.append(rawContactId).append(",");
312            }
313            sb.setLength(sb.length() - 1);
314            sb.append(')');
315        }
316
317        sb.append(")");
318
319        // The selection to select raw_contacts.
320        final String rawContactsSelection = sb.toString();
321
322        // Remove affected search_index rows.
323        final SQLiteDatabase db = mDbHelper.getWritableDatabase();
324        final int deleted = db.delete(Tables.SEARCH_INDEX,
325                SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
326                    RawContacts.CONTACT_ID +
327                    " FROM " + Tables.RAW_CONTACTS +
328                    " WHERE " + rawContactsSelection +
329                    ")"
330                , null);
331
332        // Then rebuild index for them.
333        final int count = buildAndInsertIndex(db, rawContactsSelection);
334        if (VERBOSE_LOGGING) {
335            Log.v(TAG, "Updated search index for " + count + " contacts");
336        }
337    }
338
339    private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
340        mSb.setLength(0);
341        mSb.append(Data.CONTACT_ID + ", ");
342        mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
343        mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
344        mSb.append(" THEN -4 ");
345        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
346        mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
347        mSb.append(" THEN -3 ");
348        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
349        mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
350        mSb.append(" THEN -2");
351        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
352        mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
353        mSb.append(" THEN -1");
354        mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
355        mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
356
357        int count = 0;
358        Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
359                selection, null, null, null, mSb.toString());
360        mIndexBuilder.setCursor(cursor);
361        mIndexBuilder.reset();
362        try {
363            long currentContactId = -1;
364            while (cursor.moveToNext()) {
365                long contactId = cursor.getLong(0);
366                if (contactId != currentContactId) {
367                    if (currentContactId != -1) {
368                        insertIndexRow(db, currentContactId, mIndexBuilder);
369                        count++;
370                    }
371                    currentContactId = contactId;
372                    mIndexBuilder.reset();
373                }
374                String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
375                DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
376                if (dataRowHandler.hasSearchableData()) {
377                    dataRowHandler.appendSearchableData(mIndexBuilder);
378                    mIndexBuilder.commit();
379                }
380            }
381            if (currentContactId != -1) {
382                insertIndexRow(db, currentContactId, mIndexBuilder);
383                count++;
384            }
385        } finally {
386            cursor.close();
387        }
388        return count;
389    }
390
391    private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
392        mValues.clear();
393        mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
394        mValues.put(SearchIndexColumns.NAME, builder.getName());
395        mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
396        mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
397        db.insert(Tables.SEARCH_INDEX, null, mValues);
398    }
399    private int getSearchIndexVersion() {
400        return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
401    }
402
403    private void setSearchIndexVersion(int version) {
404        mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
405    }
406
407    /**
408     * Token separator that matches SQLite's "simple" tokenizer.
409     * - Unicode codepoints >= 128: Everything
410     * - Unicode codepoints < 128: Alphanumeric and "_"
411     * - Everything else is a separator of tokens
412     */
413    private static final Pattern FTS_TOKEN_SEPARATOR_RE =
414            Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
415
416    /**
417     * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
418     */
419    @VisibleForTesting
420    static List<String> splitIntoFtsTokens(String s) {
421        final ArrayList<String> ret = Lists.newArrayList();
422        for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
423            if (!TextUtils.isEmpty(token)) {
424                ret.add(token);
425            }
426        }
427        return ret;
428    }
429
430    /**
431     * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
432     * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
433     * returned as a String.
434     * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
435     * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
436     */
437    public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
438        final StringBuilder result = new StringBuilder();
439        for (String token : splitIntoFtsTokens(query)) {
440            ftsQueryBuilder.addToken(result, token);
441        }
442        return result.toString();
443    }
444
445    public static abstract class FtsQueryBuilder {
446        public abstract void addToken(StringBuilder builder, String token);
447
448        /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
449        public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
450
451        /**
452         * Scopes each token to a column and normalizes the name.
453         * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
454         */
455        public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
456                new ScopedNameNormalizingBuilder();
457
458        /**
459         * Scopes each token to a the content column and also for name with normalization.
460         * Also adds a user-defined expression to each token. This allows common criteria to be
461         * concatenated to each token.
462         * Example (commonCriteria=" OR tokens:123*"):
463         * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
464         */
465        public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
466            return new FtsQueryBuilder() {
467                @Override
468                public void addToken(StringBuilder builder, String token) {
469                    if (builder.length() != 0) builder.append(' ');
470
471                    builder.append("content:");
472                    builder.append(token);
473                    builder.append("* ");
474
475                    final String normalizedToken = NameNormalizer.normalize(token);
476                    if (!TextUtils.isEmpty(normalizedToken)) {
477                        builder.append(" OR name:");
478                        builder.append(normalizedToken);
479                        builder.append('*');
480                    }
481
482                    builder.append(commonCriteria);
483                }
484            };
485        }
486    }
487
488    private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
489        @Override
490        public void addToken(StringBuilder builder, String token) {
491            if (builder.length() != 0) builder.append(' ');
492
493            // the token could be empty (if the search query was "_"). we should still emit it
494            // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
495            builder.append(NameNormalizer.normalize(token));
496            builder.append('*');
497        }
498    }
499
500    private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
501        @Override
502        public void addToken(StringBuilder builder, String token) {
503            if (builder.length() != 0) builder.append(' ');
504
505            builder.append("content:");
506            builder.append(token);
507            builder.append('*');
508
509            final String normalizedToken = NameNormalizer.normalize(token);
510            if (!TextUtils.isEmpty(normalizedToken)) {
511                builder.append(" OR name:");
512                builder.append(normalizedToken);
513                builder.append('*');
514            }
515
516            builder.append(" OR tokens:");
517            builder.append(token);
518            builder.append("*");
519        }
520    }
521}
522