1f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov/*
2f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * Copyright (C) 2011 The Android Open Source Project
3f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov *
4f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * Licensed under the Apache License, Version 2.0 (the "License");
5f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * you may not use this file except in compliance with the License.
6f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * You may obtain a copy of the License at
7f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov *
8f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov *      http://www.apache.org/licenses/LICENSE-2.0
9f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov *
10f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * Unless required by applicable law or agreed to in writing, software
11f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * distributed under the License is distributed on an "AS IS" BASIS,
12f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * See the License for the specific language governing permissions and
14f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * limitations under the License
15f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov */
16f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovpackage com.android.providers.contacts;
17f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
18f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
19f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns;
207e086471c6317d059af21d292bee964b24613346Makoto Onukiimport com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
21f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns;
22f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport com.android.providers.contacts.ContactsDatabaseHelper.Tables;
23116d86ddd67330428f9128613b4886fc0ea66221Makoto Onukiimport com.google.android.collect.Lists;
24116d86ddd67330428f9128613b4886fc0ea66221Makoto Onukiimport com.google.common.annotations.VisibleForTesting;
25f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
26f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.content.ContentValues;
27f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.database.Cursor;
28f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.database.sqlite.SQLiteDatabase;
2905e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikovimport android.os.SystemClock;
30197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikovimport android.provider.ContactsContract.CommonDataKinds.Email;
31197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikovimport android.provider.ContactsContract.CommonDataKinds.Nickname;
32197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikovimport android.provider.ContactsContract.CommonDataKinds.Organization;
33197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikovimport android.provider.ContactsContract.CommonDataKinds.StructuredPostal;
34f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.provider.ContactsContract.Data;
3505e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikovimport android.provider.ContactsContract.ProviderStatus;
367e086471c6317d059af21d292bee964b24613346Makoto Onukiimport android.provider.ContactsContract.RawContacts;
37f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.text.TextUtils;
38f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport android.util.Log;
39f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
40116d86ddd67330428f9128613b4886fc0ea66221Makoto Onukiimport java.util.ArrayList;
41f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport java.util.HashSet;
42116d86ddd67330428f9128613b4886fc0ea66221Makoto Onukiimport java.util.List;
43f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovimport java.util.Set;
44f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onukiimport java.util.regex.Pattern;
45f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
46f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov/**
47f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov * Maintains a search index for comprehensive contact search.
48f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov */
49f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikovpublic class SearchIndexManager {
50f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private static final String TAG = "ContactsFTS";
51f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
520992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki    private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE);
530992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki
54197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov    public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index";
5505e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    private static final int SEARCH_INDEX_VERSION = 1;
5605e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
57f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private static final class ContactIndexQuery {
58f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final String[] COLUMNS = {
59f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                Data.CONTACT_ID,
60f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                MimetypesColumns.MIMETYPE,
61f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5,
62f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11,
63f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                Data.DATA12, Data.DATA13, Data.DATA14
64f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        };
65f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
66f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final int MIMETYPE = 1;
67f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    }
68f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
69f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    public static class IndexBuilder {
70f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final int SEPARATOR_SPACE = 0;
71f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final int SEPARATOR_PARENTHESES = 1;
72f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final int SEPARATOR_SLASH = 2;
73f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public static final int SEPARATOR_COMMA = 3;
74f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
75f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        private StringBuilder mSbContent = new StringBuilder();
76155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        private StringBuilder mSbName = new StringBuilder();
77f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        private StringBuilder mSbTokens = new StringBuilder();
78f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        private StringBuilder mSbElementContent = new StringBuilder();
79f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        private HashSet<String> mUniqueElements = new HashSet<String>();
80f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        private Cursor mCursor;
81f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
82f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        void setCursor(Cursor cursor) {
83f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            this.mCursor = cursor;
84f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
85f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
86f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        void reset() {
87f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            mSbContent.setLength(0);
88f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            mSbTokens.setLength(0);
89155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            mSbName.setLength(0);
90f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            mSbElementContent.setLength(0);
91f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            mUniqueElements.clear();
92f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
93f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
94f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public String getContent() {
95f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            return mSbContent.length() == 0 ? null : mSbContent.toString();
96f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
97f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
98155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        public String getName() {
99155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            return mSbName.length() == 0 ? null : mSbName.toString();
100155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        }
101155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov
102f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public String getTokens() {
103f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            return mSbTokens.length() == 0 ? null : mSbTokens.toString();
104f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
105f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
106eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        public String getString(String columnName) {
107eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            return mCursor.getString(mCursor.getColumnIndex(columnName));
108eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        }
109eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov
1106d9702cec82fd27a1c3093c64df9dcc22744899aDmitri Plotnikov        public int getInt(String columnName) {
1116d9702cec82fd27a1c3093c64df9dcc22744899aDmitri Plotnikov            return mCursor.getInt(mCursor.getColumnIndex(columnName));
1126d9702cec82fd27a1c3093c64df9dcc22744899aDmitri Plotnikov        }
1136d9702cec82fd27a1c3093c64df9dcc22744899aDmitri Plotnikov
114f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        @Override
115f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public String toString() {
116155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            return "Content: " + mSbContent + "\n Name: " + mSbTokens + "\n Tokens: " + mSbTokens;
117f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
118f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
119f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public void commit() {
120f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            if (mSbElementContent.length() != 0) {
121eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                String content = mSbElementContent.toString().replace('\n', ' ');
122f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                if (!mUniqueElements.contains(content)) {
123f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    if (mSbContent.length() != 0) {
124f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                        mSbContent.append('\n');
125f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    }
126f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    mSbContent.append(content);
127f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    mUniqueElements.add(content);
128f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                }
129eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                mSbElementContent.setLength(0);
130f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            }
131f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
132f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
133f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public void appendContentFromColumn(String columnName) {
134f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            appendContentFromColumn(columnName, SEPARATOR_SPACE);
135f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
136f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
137f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        public void appendContentFromColumn(String columnName, int format) {
138eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            appendContent(getString(columnName), format);
139eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        }
140eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov
141eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        public void appendContent(String value) {
142eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            appendContent(value, SEPARATOR_SPACE);
143f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
144f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
145116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        private void appendContent(String value, int format) {
146f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            if (TextUtils.isEmpty(value)) {
147f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                return;
148f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            }
149f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
150f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            switch (format) {
151f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                case SEPARATOR_SPACE:
152eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    if (mSbElementContent.length() > 0) {
153eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                        mSbElementContent.append(' ');
154f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    }
155eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    mSbElementContent.append(value);
156f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    break;
157f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
158f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                case SEPARATOR_SLASH:
159eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    mSbElementContent.append('/').append(value);
160f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    break;
161f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
162f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                case SEPARATOR_PARENTHESES:
163eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    if (mSbElementContent.length() > 0) {
164eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                        mSbElementContent.append(' ');
165f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    }
166eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    mSbElementContent.append('(').append(value).append(')');
167f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    break;
168f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
169f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                case SEPARATOR_COMMA:
170eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    if (mSbElementContent.length() > 0) {
171eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                        mSbElementContent.append(", ");
172f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    }
173eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                    mSbElementContent.append(value);
174f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    break;
175f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            }
176f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
177eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov
178eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        public void appendToken(String token) {
179f482a4e25759d2c144eb41a3de56a342bd473435Dmitri Plotnikov            if (TextUtils.isEmpty(token)) {
180f482a4e25759d2c144eb41a3de56a342bd473435Dmitri Plotnikov                return;
181f482a4e25759d2c144eb41a3de56a342bd473435Dmitri Plotnikov            }
182f482a4e25759d2c144eb41a3de56a342bd473435Dmitri Plotnikov
183eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            if (mSbTokens.length() != 0) {
184eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov                mSbTokens.append(' ');
185eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            }
186eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov            mSbTokens.append(token);
187eeeed5669d98897501bf2b18c88579c7effd0955Dmitri Plotnikov        }
188155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov
189155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        public void appendName(String name) {
190155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            if (TextUtils.isEmpty(name)) {
191155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov                return;
192155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            }
193116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // First, put the original name.
194116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            appendNameInternal(name);
195116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki
196116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // Then, if the name contains more than one FTS token, put each token into the index
197116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // too.
198116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            //
199116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // This is to make names with special characters searchable, such as "double-barrelled"
200116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // "L'Image".
201116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            //
202116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // Here's how it works:
203116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // Because we "normalize" names when putting into the index, if we only put
204116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // "double-barrelled", the index will only contain "doublebarrelled".
205116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // Now, if the user searches for "double-barrelled", the searcher tokenizes it into
206116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // two tokens, "double" and "barrelled".  The first one matches "doublebarrelled"
207116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // but the second one doesn't (because we only do the prefix match), so
208116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // "doublebarrelled" doesn't match.
209116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // So, here, we put each token in a name into the index too.  In the case above,
210116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // we put also "double" and "barrelled".
211116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled"
212116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            // will all match "double-barrelled".
213116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            final List<String> nameParts = splitIntoFtsTokens(name);
214116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            if (nameParts.size() > 1) {
215116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki                for (String namePart : nameParts) {
216f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki                    if (!TextUtils.isEmpty(namePart)) {
217f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki                        appendNameInternal(namePart);
218f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki                    }
219f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki                }
220f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki            }
221f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki        }
222155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov
223116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        /**
224116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki         * Normalize a name and add to {@link #mSbName}
225116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki         */
226f5f038faf7f3ef460e1c11028d467954840e5f6fMakoto Onuki        private void appendNameInternal(String name) {
227155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            if (mSbName.length() != 0) {
228155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov                mSbName.append(' ');
229155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov            }
230d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            mSbName.append(NameNormalizer.normalize(name));
231155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        }
232f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    }
233f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
234f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private final ContactsProvider2 mContactsProvider;
235f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private final ContactsDatabaseHelper mDbHelper;
236f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private StringBuilder mSb = new StringBuilder();
237f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private IndexBuilder mIndexBuilder = new IndexBuilder();
238f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private ContentValues mValues = new ContentValues();
239f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    private String[] mSelectionArgs1 = new String[1];
240f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
241f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    public SearchIndexManager(ContactsProvider2 contactsProvider) {
242f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        this.mContactsProvider = contactsProvider;
243f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper();
244f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    }
245f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
24605e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    public void updateIndex() {
24705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) {
24805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            return;
24905e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        }
25005e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        SQLiteDatabase db = mDbHelper.getWritableDatabase();
25105e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        db.beginTransaction();
25205e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        try {
25305e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) {
25405e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                rebuildIndex(db);
25505e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                setSearchIndexVersion(SEARCH_INDEX_VERSION);
25605e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                db.setTransactionSuccessful();
25705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            }
25805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        } finally {
25905e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            db.endTransaction();
26005e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        }
26105e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    }
26205e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
26305e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    private void rebuildIndex(SQLiteDatabase db) {
26405e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        mContactsProvider.setProviderStatus(ProviderStatus.STATUS_UPGRADING);
265565b62f354d8b6aadc760092a7dbf483f8bbbe17Makoto Onuki        final long start = SystemClock.elapsedRealtime();
26605e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        int count = 0;
26705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        try {
26805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            mDbHelper.createSearchIndexTable(db);
2697e086471c6317d059af21d292bee964b24613346Makoto Onuki            count = buildAndInsertIndex(db, null);
27005e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        } finally {
27105e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            mContactsProvider.setProviderStatus(ProviderStatus.STATUS_NORMAL);
27205e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
273565b62f354d8b6aadc760092a7dbf483f8bbbe17Makoto Onuki            final long end = SystemClock.elapsedRealtime();
27405e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov            Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, "
27505e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                    + count + " contacts");
27605e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        }
27705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    }
27805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
279bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov    public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) {
2800992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        if (VERBOSE_LOGGING) {
2810992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            Log.v(TAG, "Updating search index for " + contactIds.size() +
2820992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki                    " contacts / " + rawContactIds.size() + " raw contacts");
2830992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        }
2840992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        StringBuilder sb = new StringBuilder();
2850992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        sb.append("(");
286bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov        if (!contactIds.isEmpty()) {
2870992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.append(RawContacts.CONTACT_ID + " IN (");
288bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            for (Long contactId : contactIds) {
2890992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki                sb.append(contactId).append(",");
290bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            }
2910992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.setLength(sb.length() - 1);
2920992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.append(')');
293bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov        }
294bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov
295bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov        if (!rawContactIds.isEmpty()) {
296bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            if (!contactIds.isEmpty()) {
2970992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki                sb.append(" OR ");
298bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            }
2990992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.append(RawContactsColumns.CONCRETE_ID + " IN (");
300bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            for (Long rawContactId : rawContactIds) {
3010992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki                sb.append(rawContactId).append(",");
302bd9abbb6b03b4ec1e28ad3fa2fcba5d1eb8609eaDmitri Plotnikov            }
3030992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.setLength(sb.length() - 1);
3040992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            sb.append(')');
305f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
306f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
3070992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        sb.append(")");
3087e086471c6317d059af21d292bee964b24613346Makoto Onuki
3097e086471c6317d059af21d292bee964b24613346Makoto Onuki        // The selection to select raw_contacts.
3100992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        final String rawContactsSelection = sb.toString();
3117e086471c6317d059af21d292bee964b24613346Makoto Onuki
3127e086471c6317d059af21d292bee964b24613346Makoto Onuki        // Remove affected search_index rows.
3137e086471c6317d059af21d292bee964b24613346Makoto Onuki        final SQLiteDatabase db = mDbHelper.getWritableDatabase();
3147e086471c6317d059af21d292bee964b24613346Makoto Onuki        final int deleted = db.delete(Tables.SEARCH_INDEX,
3157e086471c6317d059af21d292bee964b24613346Makoto Onuki                SearchIndexColumns.CONTACT_ID + " IN (SELECT " +
3167e086471c6317d059af21d292bee964b24613346Makoto Onuki                    RawContacts.CONTACT_ID +
3177e086471c6317d059af21d292bee964b24613346Makoto Onuki                    " FROM " + Tables.RAW_CONTACTS +
3187e086471c6317d059af21d292bee964b24613346Makoto Onuki                    " WHERE " + rawContactsSelection +
3197e086471c6317d059af21d292bee964b24613346Makoto Onuki                    ")"
3207e086471c6317d059af21d292bee964b24613346Makoto Onuki                , null);
3217e086471c6317d059af21d292bee964b24613346Makoto Onuki
3227e086471c6317d059af21d292bee964b24613346Makoto Onuki        // Then rebuild index for them.
3230992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        final int count = buildAndInsertIndex(db, rawContactsSelection);
3240992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        if (VERBOSE_LOGGING) {
3250992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki            Log.v(TAG, "Updated search index for " + count + " contacts");
3260992b9d4969ed0eee6e879db94292b635229e2b7Makoto Onuki        }
32705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    }
32805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
3297e086471c6317d059af21d292bee964b24613346Makoto Onuki    private int buildAndInsertIndex(SQLiteDatabase db, String selection) {
330197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.setLength(0);
331197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(Data.CONTACT_ID + ", ");
332197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "=");
333197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE));
334197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" THEN -4 ");
335197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
336197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE));
337197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" THEN -3 ");
338197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
339197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE));
340197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" THEN -2");
341197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "=");
342197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE));
343197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" THEN -1");
344174f7d319b987aa2aeeb6f2563f4b939acb8d791Dmitri Plotnikov        mSb.append(" ELSE " + DataColumns.MIMETYPE_ID);
345197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID);
346197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov
34705e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        int count = 0;
348197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov        Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS,
349197411a6cc3f81b94a34ca207f267d43d8548f04Dmitri Plotnikov                selection, null, null, null, mSb.toString());
350f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mIndexBuilder.setCursor(cursor);
351f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mIndexBuilder.reset();
352f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        try {
353f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            long currentContactId = -1;
354f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            while (cursor.moveToNext()) {
355f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                long contactId = cursor.getLong(0);
356f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                if (contactId != currentContactId) {
357f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    if (currentContactId != -1) {
3587e086471c6317d059af21d292bee964b24613346Makoto Onuki                        insertIndexRow(db, currentContactId, mIndexBuilder);
35905e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                        count++;
360f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    }
361f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    currentContactId = contactId;
362f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    mIndexBuilder.reset();
363f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                }
364f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE);
365f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype);
366f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                if (dataRowHandler.hasSearchableData()) {
367f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    dataRowHandler.appendSearchableData(mIndexBuilder);
368f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                    mIndexBuilder.commit();
369f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov                }
370f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            }
371f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            if (currentContactId != -1) {
3727e086471c6317d059af21d292bee964b24613346Makoto Onuki                insertIndexRow(db, currentContactId, mIndexBuilder);
37305e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov                count++;
374f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            }
375f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        } finally {
376f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov            cursor.close();
377f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        }
37805e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        return count;
379f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    }
380f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov
3817e086471c6317d059af21d292bee964b24613346Makoto Onuki    private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) {
382f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mValues.clear();
383f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mValues.put(SearchIndexColumns.CONTENT, builder.getContent());
384155accbcb95fc13b984cf0ea8e5498a9c619cbf5Dmitri Plotnikov        mValues.put(SearchIndexColumns.NAME, builder.getName());
385f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov        mValues.put(SearchIndexColumns.TOKENS, builder.getTokens());
3867e086471c6317d059af21d292bee964b24613346Makoto Onuki        mValues.put(SearchIndexColumns.CONTACT_ID, contactId);
3877e086471c6317d059af21d292bee964b24613346Makoto Onuki        db.insert(Tables.SEARCH_INDEX, null, mValues);
388f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov    }
38905e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    private int getSearchIndexVersion() {
39005e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0"));
39105e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    }
39205e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov
39305e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    private void setSearchIndexVersion(int version) {
39405e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov        mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version));
39505e50fbf9809bf04eceec3d2a2753630dc4f9315Dmitri Plotnikov    }
396d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
397d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    /**
398116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     * Token separator that matches SQLite's "simple" tokenizer.
399116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     * - Unicode codepoints >= 128: Everything
400116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     * - Unicode codepoints < 128: Alphanumeric and "_"
401116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     * - Everything else is a separator of tokens
402116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     */
403116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    private static final Pattern FTS_TOKEN_SEPARATOR_RE =
404116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]");
405116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki
406116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    /**
407116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     * Tokenize a string in the way as that of SQLite's "simple" tokenizer.
408116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki     */
409116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    @VisibleForTesting
410116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    static List<String> splitIntoFtsTokens(String s) {
411116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        final ArrayList<String> ret = Lists.newArrayList();
412116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) {
413116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            if (!TextUtils.isEmpty(token)) {
414116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki                ret.add(token);
415116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            }
416116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        }
417116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        return ret;
418116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    }
419116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki
420116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki    /**
421d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same
422d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then
423d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     * returned as a String.
424d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     * @see FtsQueryBuilder#UNSCOPED_NORMALIZING
425d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING
426d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann     */
427d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) {
428d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        final StringBuilder result = new StringBuilder();
429116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki        for (String token : splitIntoFtsTokens(query)) {
430116d86ddd67330428f9128613b4886fc0ea66221Makoto Onuki            ftsQueryBuilder.addToken(result, token);
431d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        }
432d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        return result.toString();
433d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    }
434d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
435d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    public static abstract class FtsQueryBuilder {
436d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public abstract void addToken(StringBuilder builder, String token);
437d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
438d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */
439d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder();
440d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
441d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        /**
442d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * Scopes each token to a column and normalizes the name.
443d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*"
444d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         */
445d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING =
446d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                new ScopedNameNormalizingBuilder();
447d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
448d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        /**
449d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * Scopes each token to a the content column and also for name with normalization.
450d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * Also adds a user-defined expression to each token. This allows common criteria to be
451d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * concatenated to each token.
452d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * Example (commonCriteria=" OR tokens:123*"):
453d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*"
454d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann         */
455d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) {
456d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            return new FtsQueryBuilder() {
457d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                @Override
458d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                public void addToken(StringBuilder builder, String token) {
459d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    if (builder.length() != 0) builder.append(' ');
460d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
461d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    builder.append("content:");
462d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    builder.append(token);
463d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    builder.append("* ");
464d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
465d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    final String normalizedToken = NameNormalizer.normalize(token);
466d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    if (!TextUtils.isEmpty(normalizedToken)) {
467d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                        builder.append(" OR name:");
468d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                        builder.append(normalizedToken);
469d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                        builder.append('*');
470d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    }
471d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
472d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                    builder.append(commonCriteria);
473d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                }
474d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            };
475d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        }
476d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    }
477d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
478d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    private static class UnscopedNormalizingBuilder extends FtsQueryBuilder {
479d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        @Override
480d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public void addToken(StringBuilder builder, String token) {
481d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            if (builder.length() != 0) builder.append(' ');
482d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
483d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            // the token could be empty (if the search query was "_"). we should still emit it
484d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            // here, as we otherwise risk to end up with an empty MATCH-expression MATCH ""
485d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append(NameNormalizer.normalize(token));
486d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append('*');
487d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        }
488d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    }
489d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
490d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder {
491d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        @Override
492d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        public void addToken(StringBuilder builder, String token) {
493d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            if (builder.length() != 0) builder.append(' ');
494d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
495d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append("content:");
496d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append(token);
497d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append('*');
498d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
499d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            final String normalizedToken = NameNormalizer.normalize(token);
500d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            if (!TextUtils.isEmpty(normalizedToken)) {
501d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                builder.append(" OR name:");
502d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                builder.append(normalizedToken);
503d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann                builder.append('*');
504d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            }
505d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann
506d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append(" OR tokens:");
507d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append(token);
508d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann            builder.append("*");
509d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann        }
510d1746e09bc7739f3d1449cececc66d5045ada498Daniel Lehmann    }
511f262d56495ac4ea30d31bd050efb116bd4bb4235Dmitri Plotnikov}
512