1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts.aggregation;
18
19import android.database.Cursor;
20import android.database.DatabaseUtils;
21import android.database.sqlite.SQLiteDatabase;
22import android.provider.ContactsContract.AggregationExceptions;
23import android.provider.ContactsContract.CommonDataKinds.Identity;
24import android.provider.ContactsContract.Contacts.AggregationSuggestions;
25import android.provider.ContactsContract.Data;
26import android.provider.ContactsContract.RawContacts;
27import android.text.TextUtils;
28import android.util.Log;
29import com.android.providers.contacts.ContactsDatabaseHelper;
30import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns;
31import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupColumns;
32import com.android.providers.contacts.ContactsDatabaseHelper.NameLookupType;
33import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns;
34import com.android.providers.contacts.ContactsDatabaseHelper.Tables;
35import com.android.providers.contacts.ContactsProvider2;
36import com.android.providers.contacts.NameSplitter;
37import com.android.providers.contacts.PhotoPriorityResolver;
38import com.android.providers.contacts.TransactionContext;
39import com.android.providers.contacts.aggregation.util.CommonNicknameCache;
40import com.android.providers.contacts.aggregation.util.ContactMatcher;
41import com.android.providers.contacts.aggregation.util.MatchScore;
42import com.android.providers.contacts.database.ContactsTableUtil;
43import com.google.android.collect.Sets;
44
45import java.util.ArrayList;
46import java.util.HashSet;
47import java.util.List;
48import java.util.Set;
49
50/**
51 * ContactAggregator deals with aggregating contact information coming from different sources.
52 * Two John Doe contacts from two disjoint sources are presumed to be the same
53 * person unless the user declares otherwise.
54 */
55public class ContactAggregator extends AbstractContactAggregator {
56
57    // Return code for the canJoinIntoContact method.
58    private static final int JOIN = 1;
59    private static final int KEEP_SEPARATE = 0;
60    private static final int RE_AGGREGATE = -1;
61
62    private final ContactMatcher mMatcher = new ContactMatcher();
63
64    /**
65     * Constructor.
66     */
67    public ContactAggregator(ContactsProvider2 contactsProvider,
68            ContactsDatabaseHelper contactsDatabaseHelper,
69            PhotoPriorityResolver photoPriorityResolver, NameSplitter nameSplitter,
70            CommonNicknameCache commonNicknameCache) {
71        super(contactsProvider, contactsDatabaseHelper, photoPriorityResolver, nameSplitter,
72                commonNicknameCache);
73    }
74
75  /**
76     * Given a specific raw contact, finds all matching aggregate contacts and chooses the one
77     * with the highest match score.  If no such contact is found, creates a new contact.
78     */
79    synchronized void aggregateContact(TransactionContext txContext, SQLiteDatabase db,
80            long rawContactId, long accountId, long currentContactId,
81            MatchCandidateList candidates) {
82
83        if (VERBOSE_LOGGING) {
84            Log.v(TAG, "aggregateContact: rid=" + rawContactId + " cid=" + currentContactId);
85        }
86
87        int aggregationMode = RawContacts.AGGREGATION_MODE_DEFAULT;
88
89        Integer aggModeObject = mRawContactsMarkedForAggregation.remove(rawContactId);
90        if (aggModeObject != null) {
91            aggregationMode = aggModeObject;
92        }
93
94        long contactId = -1; // Best matching contact ID.
95        boolean needReaggregate = false;
96
97        final ContactMatcher matcher = new ContactMatcher();
98        final Set<Long> rawContactIdsInSameAccount = new HashSet<Long>();
99        final Set<Long> rawContactIdsInOtherAccount = new HashSet<Long>();
100        if (aggregationMode == RawContacts.AGGREGATION_MODE_DEFAULT) {
101            candidates.clear();
102            matcher.clear();
103
104            contactId = pickBestMatchBasedOnExceptions(db, rawContactId, matcher);
105            if (contactId == -1) {
106
107                // If this is a newly inserted contact or a visible contact, look for
108                // data matches.
109                if (currentContactId == 0
110                        || mDbHelper.isContactInDefaultDirectory(db, currentContactId)) {
111                    contactId = pickBestMatchBasedOnData(db, rawContactId, candidates, matcher);
112                }
113
114                // If we found an best matched contact, find out if the raw contact can be joined
115                // into it
116                if (contactId != -1 && contactId != currentContactId) {
117                    // List all raw contact ID and their account ID mappings in contact
118                    // [contactId] excluding raw_contact [rawContactId].
119
120                    // Based on the mapping, create two sets of raw contact IDs in
121                    // [rawContactAccountId] and not in [rawContactAccountId]. We don't always
122                    // need them, so lazily initialize them.
123                    mSelectionArgs2[0] = String.valueOf(contactId);
124                    mSelectionArgs2[1] = String.valueOf(rawContactId);
125                    final Cursor rawContactsToAccountsCursor = db.rawQuery(
126                            "SELECT " + RawContacts._ID + ", " + RawContactsColumns.ACCOUNT_ID +
127                                    " FROM " + Tables.RAW_CONTACTS +
128                                    " WHERE " + RawContacts.CONTACT_ID + "=?" +
129                                    " AND " + RawContacts._ID + "!=?",
130                            mSelectionArgs2);
131                    try {
132                        rawContactsToAccountsCursor.moveToPosition(-1);
133                        while (rawContactsToAccountsCursor.moveToNext()) {
134                            final long rcId = rawContactsToAccountsCursor.getLong(0);
135                            final long rc_accountId = rawContactsToAccountsCursor.getLong(1);
136                            if (rc_accountId == accountId) {
137                                rawContactIdsInSameAccount.add(rcId);
138                            } else {
139                                rawContactIdsInOtherAccount.add(rcId);
140                            }
141                        }
142                    } finally {
143                        rawContactsToAccountsCursor.close();
144                    }
145                    final int actionCode;
146                    final int totalNumOfRawContactsInCandidate = rawContactIdsInSameAccount.size()
147                            + rawContactIdsInOtherAccount.size();
148                    if (totalNumOfRawContactsInCandidate >= AGGREGATION_CONTACT_SIZE_LIMIT) {
149                        if (VERBOSE_LOGGING) {
150                            Log.v(TAG, "Too many raw contacts (" + totalNumOfRawContactsInCandidate
151                                    + ") in the best matching contact, so skip aggregation");
152                        }
153                        actionCode = KEEP_SEPARATE;
154                    } else {
155                        actionCode = canJoinIntoContact(db, rawContactId,
156                                rawContactIdsInSameAccount, rawContactIdsInOtherAccount);
157                    }
158                    if (actionCode == KEEP_SEPARATE) {
159                        contactId = -1;
160                    } else if (actionCode == RE_AGGREGATE) {
161                        needReaggregate = true;
162                    }
163                }
164            }
165        } else if (aggregationMode == RawContacts.AGGREGATION_MODE_DISABLED) {
166            return;
167        }
168
169        // # of raw_contacts in the [currentContactId] contact excluding the [rawContactId]
170        // raw_contact.
171        long currentContactContentsCount = 0;
172
173        if (currentContactId != 0) {
174            mRawContactCountQuery.bindLong(1, currentContactId);
175            mRawContactCountQuery.bindLong(2, rawContactId);
176            currentContactContentsCount = mRawContactCountQuery.simpleQueryForLong();
177        }
178
179        // If there are no other raw contacts in the current aggregate, we might as well reuse it.
180        // Also, if the aggregation mode is SUSPENDED, we must reuse the same aggregate.
181        if (contactId == -1
182                && currentContactId != 0
183                && (currentContactContentsCount == 0
184                        || aggregationMode == RawContacts.AGGREGATION_MODE_SUSPENDED)) {
185            contactId = currentContactId;
186        }
187
188        if (contactId == currentContactId) {
189            // Aggregation unchanged
190            markAggregated(db, String.valueOf(rawContactId));
191            if (VERBOSE_LOGGING) {
192                Log.v(TAG, "Aggregation unchanged");
193            }
194        } else if (contactId == -1) {
195            // create new contact for [rawContactId]
196            createContactForRawContacts(db, txContext, Sets.newHashSet(rawContactId), null);
197            if (currentContactContentsCount > 0) {
198                updateAggregateData(txContext, currentContactId);
199            }
200            if (VERBOSE_LOGGING) {
201                Log.v(TAG, "create new contact for rid=" + rawContactId);
202            }
203        } else if (needReaggregate) {
204            // re-aggregate
205            final Set<Long> allRawContactIdSet = new HashSet<Long>();
206            allRawContactIdSet.addAll(rawContactIdsInSameAccount);
207            allRawContactIdSet.addAll(rawContactIdsInOtherAccount);
208            // If there is no other raw contacts aggregated with the given raw contact currently,
209            // we might as well reuse it.
210            currentContactId = (currentContactId != 0 && currentContactContentsCount == 0)
211                    ? currentContactId : 0;
212            reAggregateRawContacts(txContext, db, contactId, currentContactId, rawContactId,
213                    allRawContactIdSet);
214            if (VERBOSE_LOGGING) {
215                Log.v(TAG, "Re-aggregating rid=" + rawContactId + " and cid=" + contactId);
216            }
217        } else {
218            // Joining with an existing aggregate
219            if (currentContactContentsCount == 0) {
220                // Delete a previous aggregate if it only contained this raw contact
221                ContactsTableUtil.deleteContact(db, currentContactId);
222
223                mAggregatedPresenceDelete.bindLong(1, currentContactId);
224                mAggregatedPresenceDelete.execute();
225            }
226
227            clearSuperPrimarySetting(db, contactId, rawContactId);
228            setContactIdAndMarkAggregated(rawContactId, contactId);
229            computeAggregateData(db, contactId, mContactUpdate);
230            mContactUpdate.bindLong(ContactReplaceSqlStatement.CONTACT_ID, contactId);
231            mContactUpdate.execute();
232            mDbHelper.updateContactVisible(txContext, contactId);
233            updateAggregatedStatusUpdate(contactId);
234            // Make sure the raw contact does not contribute to the current contact
235            if (currentContactId != 0) {
236                updateAggregateData(txContext, currentContactId);
237            }
238            if (VERBOSE_LOGGING) {
239                Log.v(TAG, "Join rid=" + rawContactId + " with cid=" + contactId);
240            }
241        }
242    }
243
244    /**
245     * Find out which mime-types are shared by raw contact of {@code rawContactId} and raw contacts
246     * of {@code contactId}. Clear the is_super_primary settings for these mime-types.
247     */
248    private void clearSuperPrimarySetting(SQLiteDatabase db, long contactId, long rawContactId) {
249        final String[] args = {String.valueOf(contactId), String.valueOf(rawContactId)};
250
251        // Find out which mime-types exist with is_super_primary=true on both the raw contact of
252        // rawContactId and raw contacts of contactId
253        int index = 0;
254        final StringBuilder mimeTypeCondition = new StringBuilder();
255        mimeTypeCondition.append(" AND " + DataColumns.MIMETYPE_ID + " IN (");
256
257        final Cursor c = db.rawQuery(
258                "SELECT DISTINCT(a." + DataColumns.MIMETYPE_ID + ")" +
259                " FROM (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " +
260                        Data.IS_SUPER_PRIMARY + " =1 AND " +
261                        Data.RAW_CONTACT_ID + " IN (SELECT " + RawContacts._ID + " FROM " +
262                        Tables.RAW_CONTACTS + " WHERE " + RawContacts.CONTACT_ID + "=?1)) AS a" +
263                " JOIN  (SELECT " + DataColumns.MIMETYPE_ID + " FROM " + Tables.DATA + " WHERE " +
264                        Data.IS_SUPER_PRIMARY + " =1 AND " +
265                        Data.RAW_CONTACT_ID + "=?2) AS b" +
266                " ON a." + DataColumns.MIMETYPE_ID + "=b." + DataColumns.MIMETYPE_ID,
267                args);
268        try {
269            c.moveToPosition(-1);
270            while (c.moveToNext()) {
271                if (index > 0) {
272                    mimeTypeCondition.append(',');
273                }
274                mimeTypeCondition.append(c.getLong((0)));
275                index++;
276            }
277        } finally {
278            c.close();
279        }
280
281        if (index == 0) {
282            return;
283        }
284
285        // Clear is_super_primary setting for all the mime-types with is_super_primary=true
286        // in both raw contact of rawContactId and raw contacts of contactId
287        String superPrimaryUpdateSql = "UPDATE " + Tables.DATA +
288                " SET " + Data.IS_SUPER_PRIMARY + "=0" +
289                " WHERE (" +  Data.RAW_CONTACT_ID +
290                        " IN (SELECT " + RawContacts._ID +  " FROM " + Tables.RAW_CONTACTS +
291                        " WHERE " + RawContacts.CONTACT_ID + "=?1)" +
292                        " OR " +  Data.RAW_CONTACT_ID + "=?2)";
293
294        mimeTypeCondition.append(')');
295        superPrimaryUpdateSql += mimeTypeCondition.toString();
296        db.execSQL(superPrimaryUpdateSql, args);
297    }
298
299    /**
300     * @return JOIN if the raw contact of {@code rawContactId} can be joined into the existing
301     * contact of {@code contactId}. KEEP_SEPARATE if the raw contact of {@code rawContactId}
302     * cannot be joined into the existing contact of {@code contactId}. RE_AGGREGATE if raw contact
303     * of {@code rawContactId} and all the raw contacts of contact of {@code contactId} need to be
304     * re-aggregated.
305     *
306     * If contact of {@code contactId} doesn't contain any raw contacts from the same account as
307     * raw contact of {@code rawContactId}, join raw contact with contact if there is no identity
308     * mismatch between them on the same namespace, otherwise, keep them separate.
309     *
310     * If contact of {@code contactId} contains raw contacts from the same account as raw contact of
311     * {@code rawContactId}, join raw contact with contact if there's at least one raw contact in
312     * those raw contacts that shares at least one email address, phone number, or identity;
313     * otherwise, re-aggregate raw contact and all the raw contacts of contact.
314     */
315    private int canJoinIntoContact(SQLiteDatabase db, long rawContactId,
316            Set<Long> rawContactIdsInSameAccount, Set<Long> rawContactIdsInOtherAccount ) {
317
318        if (rawContactIdsInSameAccount.isEmpty()) {
319            final String rid = String.valueOf(rawContactId);
320            final String ridsInOtherAccts = TextUtils.join(",", rawContactIdsInOtherAccount);
321            // If there is no identity match between raw contact of [rawContactId] and
322            // any raw contact in other accounts on the same namespace, and there is at least
323            // one identity mismatch exist, keep raw contact separate from contact.
324            if (DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts,
325                    /* isIdentityMatching =*/ true, /* countOnly =*/ true), null) == 0 &&
326                    DatabaseUtils.longForQuery(db, buildIdentityMatchingSql(rid, ridsInOtherAccts,
327                            /* isIdentityMatching =*/ false, /* countOnly =*/ true), null) > 0) {
328                if (VERBOSE_LOGGING) {
329                    Log.v(TAG, "canJoinIntoContact: no duplicates, but has no matching identity " +
330                            "and has mis-matching identity on the same namespace between rid=" +
331                            rid + " and ridsInOtherAccts=" + ridsInOtherAccts);
332                }
333                return KEEP_SEPARATE; // has identity and identity doesn't match
334            } else {
335                if (VERBOSE_LOGGING) {
336                    Log.v(TAG, "canJoinIntoContact: can join the first raw contact from the same " +
337                            "account without any identity mismatch.");
338                }
339                return JOIN; // no identity or identity match
340            }
341        }
342        if (VERBOSE_LOGGING) {
343            Log.v(TAG, "canJoinIntoContact: " + rawContactIdsInSameAccount.size() +
344                    " duplicate(s) found");
345        }
346
347
348        final Set<Long> rawContactIdSet = new HashSet<Long>();
349        rawContactIdSet.add(rawContactId);
350        if (rawContactIdsInSameAccount.size() > 0 &&
351                isDataMaching(db, rawContactIdSet, rawContactIdsInSameAccount)) {
352            if (VERBOSE_LOGGING) {
353                Log.v(TAG, "canJoinIntoContact: join if there is a data matching found in the " +
354                        "same account");
355            }
356            return JOIN;
357        } else {
358            if (VERBOSE_LOGGING) {
359                Log.v(TAG, "canJoinIntoContact: re-aggregate rid=" + rawContactId +
360                        " with its best matching contact to connected component");
361            }
362            return RE_AGGREGATE;
363        }
364    }
365
366    /**
367     * If there's any identity, email address or a phone number matching between two raw contact
368     * sets.
369     */
370    private boolean isDataMaching(SQLiteDatabase db, Set<Long> rawContactIdSet1,
371            Set<Long> rawContactIdSet2) {
372        final String rawContactIds1 = TextUtils.join(",", rawContactIdSet1);
373        final String rawContactIds2 = TextUtils.join(",", rawContactIdSet2);
374        // First, check for the identity
375        if (isFirstColumnGreaterThanZero(db, buildIdentityMatchingSql(
376                rawContactIds1, rawContactIds2,  /* isIdentityMatching =*/ true,
377                /* countOnly =*/true))) {
378            if (VERBOSE_LOGGING) {
379                Log.v(TAG, "canJoinIntoContact: identity match found between " + rawContactIds1 +
380                        " and " + rawContactIds2);
381            }
382            return true;
383        }
384
385        // Next, check for the email address.
386        if (isFirstColumnGreaterThanZero(db,
387                buildEmailMatchingSql(rawContactIds1, rawContactIds2, true))) {
388            if (VERBOSE_LOGGING) {
389                Log.v(TAG, "canJoinIntoContact: email match found between " + rawContactIds1 +
390                        " and " + rawContactIds2);
391            }
392            return true;
393        }
394
395        // Lastly, the phone number.
396        if (isFirstColumnGreaterThanZero(db,
397                buildPhoneMatchingSql(rawContactIds1, rawContactIds2, true))) {
398            if (VERBOSE_LOGGING) {
399                Log.v(TAG, "canJoinIntoContact: phone match found between " + rawContactIds1 +
400                        " and " + rawContactIds2);
401            }
402            return true;
403        }
404        return false;
405    }
406
407    /**
408     * Re-aggregate rawContact of {@code rawContactId} and all the raw contacts of
409     * {@code existingRawContactIds} into connected components. This only happens when a given
410     * raw contacts cannot be joined with its best matching contacts directly.
411     *
412     *  Two raw contacts are considered connected if they share at least one email address, phone
413     *  number or identity. Create new contact for each connected component except the very first
414     *  one that doesn't contain rawContactId of {@code rawContactId}.
415     */
416    private void reAggregateRawContacts(TransactionContext txContext, SQLiteDatabase db,
417            long contactId, long currentContactId, long rawContactId,
418            Set<Long> existingRawContactIds) {
419        // Find the connected component based on the aggregation exceptions or
420        // identity/email/phone matching for all the raw contacts of [contactId] and the give
421        // raw contact.
422        final Set<Long> allIds = new HashSet<Long>();
423        allIds.add(rawContactId);
424        allIds.addAll(existingRawContactIds);
425        final Set<Set<Long>> connectedRawContactSets = findConnectedRawContacts(db, allIds);
426
427        if (connectedRawContactSets.size() == 1) {
428            // If everything is connected, create one contact with [contactId]
429            createContactForRawContacts(db, txContext, connectedRawContactSets.iterator().next(),
430                    contactId);
431        } else {
432            for (Set<Long> connectedRawContactIds : connectedRawContactSets) {
433                if (connectedRawContactIds.contains(rawContactId)) {
434                    // crate contact for connect component containing [rawContactId], reuse
435                    // [currentContactId] if possible.
436                    createContactForRawContacts(db, txContext, connectedRawContactIds,
437                            currentContactId == 0 ? null : currentContactId);
438                    connectedRawContactSets.remove(connectedRawContactIds);
439                    break;
440                }
441            }
442            // Create new contact for each connected component except the last one. The last one
443            // will reuse [contactId]. Only the last one can reuse [contactId] when all other raw
444            // contacts has already been assigned new contact Id, so that the contact aggregation
445            // stats could be updated correctly.
446            int index = connectedRawContactSets.size();
447            for (Set<Long> connectedRawContactIds : connectedRawContactSets) {
448                if (index > 1) {
449                    createContactForRawContacts(db, txContext, connectedRawContactIds, null);
450                    index--;
451                } else {
452                    createContactForRawContacts(db, txContext, connectedRawContactIds, contactId);
453                }
454            }
455        }
456    }
457
458    /**
459     * Ensures that automatic aggregation rules are followed after a contact
460     * becomes visible or invisible. Specifically, consider this case: there are
461     * three contacts named Foo. Two of them come from account A1 and one comes
462     * from account A2. The aggregation rules say that in this case none of the
463     * three Foo's should be aggregated: two of them are in the same account, so
464     * they don't get aggregated; the third has two affinities, so it does not
465     * join either of them.
466     * <p>
467     * Consider what happens if one of the "Foo"s from account A1 becomes
468     * invisible. Nothing stands in the way of aggregating the other two
469     * anymore, so they should get joined.
470     * <p>
471     * What if the invisible "Foo" becomes visible after that? We should split the
472     * aggregate between the other two.
473     */
474    public void updateAggregationAfterVisibilityChange(long contactId) {
475        SQLiteDatabase db = mDbHelper.getWritableDatabase();
476        boolean visible = mDbHelper.isContactInDefaultDirectory(db, contactId);
477        if (visible) {
478            markContactForAggregation(db, contactId);
479        } else {
480            // Find all contacts that _could be_ aggregated with this one and
481            // rerun aggregation for all of them
482            mSelectionArgs1[0] = String.valueOf(contactId);
483            Cursor cursor = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS,
484                    RawContactIdQuery.SELECTION, mSelectionArgs1, null, null, null);
485            try {
486                while (cursor.moveToNext()) {
487                    long rawContactId = cursor.getLong(RawContactIdQuery.RAW_CONTACT_ID);
488                    mMatcher.clear();
489
490                    updateMatchScoresBasedOnIdentityMatch(db, rawContactId, mMatcher);
491                    updateMatchScoresBasedOnNameMatches(db, rawContactId, mMatcher);
492                    List<MatchScore> bestMatches =
493                            mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_PRIMARY);
494                    for (MatchScore matchScore : bestMatches) {
495                        markContactForAggregation(db, matchScore.getContactId());
496                    }
497
498                    mMatcher.clear();
499                    updateMatchScoresBasedOnEmailMatches(db, rawContactId, mMatcher);
500                    updateMatchScoresBasedOnPhoneMatches(db, rawContactId, mMatcher);
501                    bestMatches =
502                            mMatcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SECONDARY);
503                    for (MatchScore matchScore : bestMatches) {
504                        markContactForAggregation(db, matchScore.getContactId());
505                    }
506                }
507            } finally {
508                cursor.close();
509            }
510        }
511    }
512
513    /**
514     * Updates the contact ID for the specified contact and marks the raw contact as aggregated.
515     */
516    private void setContactIdAndMarkAggregated(long rawContactId, long contactId) {
517        mContactIdAndMarkAggregatedUpdate.bindLong(1, contactId);
518        mContactIdAndMarkAggregatedUpdate.bindLong(2, rawContactId);
519        mContactIdAndMarkAggregatedUpdate.execute();
520    }
521
522    interface AggregateExceptionQuery {
523        String TABLE = Tables.AGGREGATION_EXCEPTIONS
524            + " JOIN raw_contacts raw_contacts1 "
525                    + " ON (agg_exceptions.raw_contact_id1 = raw_contacts1._id) "
526            + " JOIN raw_contacts raw_contacts2 "
527                    + " ON (agg_exceptions.raw_contact_id2 = raw_contacts2._id) ";
528
529        String[] COLUMNS = {
530            AggregationExceptions.TYPE,
531            AggregationExceptions.RAW_CONTACT_ID1,
532            "raw_contacts1." + RawContacts.CONTACT_ID,
533            "raw_contacts1." + RawContactsColumns.AGGREGATION_NEEDED,
534            "raw_contacts2." + RawContacts.CONTACT_ID,
535            "raw_contacts2." + RawContactsColumns.AGGREGATION_NEEDED,
536        };
537
538        int TYPE = 0;
539        int RAW_CONTACT_ID1 = 1;
540        int CONTACT_ID1 = 2;
541        int AGGREGATION_NEEDED_1 = 3;
542        int CONTACT_ID2 = 4;
543        int AGGREGATION_NEEDED_2 = 5;
544    }
545
546    /**
547     * Computes match scores based on exceptions entered by the user: always match and never match.
548     * Returns the aggregate contact with the always match exception if any.
549     */
550    private long pickBestMatchBasedOnExceptions(SQLiteDatabase db, long rawContactId,
551            ContactMatcher matcher) {
552        if (!mAggregationExceptionIdsValid) {
553            prefetchAggregationExceptionIds(db);
554        }
555
556        // If there are no aggregation exceptions involving this raw contact, there is no need to
557        // run a query and we can just return -1, which stands for "nothing found"
558        if (!mAggregationExceptionIds.contains(rawContactId)) {
559            return -1;
560        }
561
562        final Cursor c = db.query(AggregateExceptionQuery.TABLE,
563                AggregateExceptionQuery.COLUMNS,
564                AggregationExceptions.RAW_CONTACT_ID1 + "=" + rawContactId
565                        + " OR " + AggregationExceptions.RAW_CONTACT_ID2 + "=" + rawContactId,
566                null, null, null, null);
567
568        try {
569            while (c.moveToNext()) {
570                int type = c.getInt(AggregateExceptionQuery.TYPE);
571                long rawContactId1 = c.getLong(AggregateExceptionQuery.RAW_CONTACT_ID1);
572                long contactId = -1;
573                if (rawContactId == rawContactId1) {
574                    if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_2) == 0
575                            && !c.isNull(AggregateExceptionQuery.CONTACT_ID2)) {
576                        contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID2);
577                    }
578                } else {
579                    if (c.getInt(AggregateExceptionQuery.AGGREGATION_NEEDED_1) == 0
580                            && !c.isNull(AggregateExceptionQuery.CONTACT_ID1)) {
581                        contactId = c.getLong(AggregateExceptionQuery.CONTACT_ID1);
582                    }
583                }
584                if (contactId != -1) {
585                    if (type == AggregationExceptions.TYPE_KEEP_TOGETHER) {
586                        matcher.keepIn(contactId);
587                    } else {
588                        matcher.keepOut(contactId);
589                    }
590                }
591            }
592        } finally {
593            c.close();
594        }
595
596        return matcher.pickBestMatch(MatchScore.MAX_SCORE, true);
597    }
598
599    /**
600     * Picks the best matching contact based on matches between data elements.  It considers
601     * name match to be primary and phone, email etc matches to be secondary.  A good primary
602     * match triggers aggregation, while a good secondary match only triggers aggregation in
603     * the absence of a strong primary mismatch.
604     * <p>
605     * Consider these examples:
606     * <p>
607     * John Doe with phone number 111-111-1111 and Jon Doe with phone number 111-111-1111 should
608     * be aggregated (same number, similar names).
609     * <p>
610     * John Doe with phone number 111-111-1111 and Deborah Doe with phone number 111-111-1111 should
611     * not be aggregated (same number, different names).
612     */
613    private long pickBestMatchBasedOnData(SQLiteDatabase db, long rawContactId,
614            MatchCandidateList candidates, ContactMatcher matcher) {
615
616        // Find good matches based on name alone
617        long bestMatch = updateMatchScoresBasedOnDataMatches(db, rawContactId, matcher);
618        if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) {
619            // We found multiple matches on the name - do not aggregate because of the ambiguity
620            return -1;
621        } else if (bestMatch == -1) {
622            // We haven't found a good match on name, see if we have any matches on phone, email etc
623            bestMatch = pickBestMatchBasedOnSecondaryData(db, rawContactId, candidates, matcher);
624            if (bestMatch == ContactMatcher.MULTIPLE_MATCHES) {
625                return -1;
626            }
627        }
628
629        return bestMatch;
630    }
631
632
633    /**
634     * Picks the best matching contact based on secondary data matches.  The method loads
635     * structured names for all candidate contacts and recomputes match scores using approximate
636     * matching.
637     */
638    private long pickBestMatchBasedOnSecondaryData(SQLiteDatabase db,
639            long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) {
640        List<Long> secondaryContactIds = matcher.prepareSecondaryMatchCandidates(
641                ContactMatcher.SCORE_THRESHOLD_PRIMARY);
642        if (secondaryContactIds == null || secondaryContactIds.size() > SECONDARY_HIT_LIMIT) {
643            return -1;
644        }
645
646        loadNameMatchCandidates(db, rawContactId, candidates, true);
647
648        mSb.setLength(0);
649        mSb.append(RawContacts.CONTACT_ID).append(" IN (");
650        for (int i = 0; i < secondaryContactIds.size(); i++) {
651            if (i != 0) {
652                mSb.append(',');
653            }
654            mSb.append(secondaryContactIds.get(i));
655        }
656
657        // We only want to compare structured names to structured names
658        // at this stage, we need to ignore all other sources of name lookup data.
659        mSb.append(") AND " + STRUCTURED_NAME_BASED_LOOKUP_SQL);
660
661        matchAllCandidates(db, mSb.toString(), candidates, matcher,
662                ContactMatcher.MATCHING_ALGORITHM_CONSERVATIVE, null);
663
664        return matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_SECONDARY, false);
665    }
666
667    /**
668     * Computes scores for contacts that have matching data rows.
669     */
670    private long updateMatchScoresBasedOnDataMatches(SQLiteDatabase db, long rawContactId,
671            ContactMatcher matcher) {
672
673        updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher);
674        updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher);
675        long bestMatch = matcher.pickBestMatch(ContactMatcher.SCORE_THRESHOLD_PRIMARY, false);
676        if (bestMatch != -1) {
677            return bestMatch;
678        }
679
680        updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher);
681        updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher);
682
683        return -1;
684    }
685
686    private interface IdentityLookupMatchQuery {
687        final String TABLE = Tables.DATA + " dataA"
688                + " JOIN " + Tables.DATA + " dataB" +
689                " ON (dataA." + Identity.NAMESPACE + "=dataB." + Identity.NAMESPACE +
690                " AND dataA." + Identity.IDENTITY + "=dataB." + Identity.IDENTITY + ")"
691                + " JOIN " + Tables.RAW_CONTACTS +
692                " ON (dataB." + Data.RAW_CONTACT_ID + " = "
693                + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")";
694
695        final String SELECTION = "dataA." + Data.RAW_CONTACT_ID + "=?1"
696                + " AND dataA." + DataColumns.MIMETYPE_ID + "=?2"
697                + " AND dataA." + Identity.NAMESPACE + " NOT NULL"
698                + " AND dataA." + Identity.IDENTITY + " NOT NULL"
699                + " AND dataB." + DataColumns.MIMETYPE_ID + "=?2"
700                + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0"
701                + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
702
703        final String[] COLUMNS = new String[] {
704            RawContacts.CONTACT_ID
705        };
706
707        int CONTACT_ID = 0;
708    }
709
710    /**
711     * Finds contacts with exact identity matches to the the specified raw contact.
712     */
713    private void updateMatchScoresBasedOnIdentityMatch(SQLiteDatabase db, long rawContactId,
714            ContactMatcher matcher) {
715        mSelectionArgs2[0] = String.valueOf(rawContactId);
716        mSelectionArgs2[1] = String.valueOf(mMimeTypeIdIdentity);
717        Cursor c = db.query(IdentityLookupMatchQuery.TABLE, IdentityLookupMatchQuery.COLUMNS,
718                IdentityLookupMatchQuery.SELECTION,
719                mSelectionArgs2, RawContacts.CONTACT_ID, null, null);
720        try {
721            while (c.moveToNext()) {
722                final long contactId = c.getLong(IdentityLookupMatchQuery.CONTACT_ID);
723                matcher.matchIdentity(contactId);
724            }
725        } finally {
726            c.close();
727        }
728
729    }
730
731    private interface NameLookupMatchQuery {
732        String TABLE = Tables.NAME_LOOKUP + " nameA"
733                + " JOIN " + Tables.NAME_LOOKUP + " nameB" +
734                " ON (" + "nameA." + NameLookupColumns.NORMALIZED_NAME + "="
735                        + "nameB." + NameLookupColumns.NORMALIZED_NAME + ")"
736                + " JOIN " + Tables.RAW_CONTACTS +
737                " ON (nameB." + NameLookupColumns.RAW_CONTACT_ID + " = "
738                        + Tables.RAW_CONTACTS + "." + RawContacts._ID + ")";
739
740        String SELECTION = "nameA." + NameLookupColumns.RAW_CONTACT_ID + "=?"
741                + " AND " + RawContactsColumns.AGGREGATION_NEEDED + "=0"
742                + " AND " + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
743
744        String[] COLUMNS = new String[] {
745            RawContacts.CONTACT_ID,
746            "nameA." + NameLookupColumns.NORMALIZED_NAME,
747            "nameA." + NameLookupColumns.NAME_TYPE,
748            "nameB." + NameLookupColumns.NAME_TYPE,
749        };
750
751        int CONTACT_ID = 0;
752        int NAME = 1;
753        int NAME_TYPE_A = 2;
754        int NAME_TYPE_B = 3;
755    }
756
757    /**
758     * Finds contacts with names matching the name of the specified raw contact.
759     */
760    private void updateMatchScoresBasedOnNameMatches(SQLiteDatabase db, long rawContactId,
761            ContactMatcher matcher) {
762        mSelectionArgs1[0] = String.valueOf(rawContactId);
763        Cursor c = db.query(NameLookupMatchQuery.TABLE, NameLookupMatchQuery.COLUMNS,
764                NameLookupMatchQuery.SELECTION,
765                mSelectionArgs1, null, null, null, PRIMARY_HIT_LIMIT_STRING);
766        try {
767            while (c.moveToNext()) {
768                long contactId = c.getLong(NameLookupMatchQuery.CONTACT_ID);
769                String name = c.getString(NameLookupMatchQuery.NAME);
770                int nameTypeA = c.getInt(NameLookupMatchQuery.NAME_TYPE_A);
771                int nameTypeB = c.getInt(NameLookupMatchQuery.NAME_TYPE_B);
772                matcher.matchName(contactId, nameTypeA, name,
773                        nameTypeB, name, ContactMatcher.MATCHING_ALGORITHM_EXACT);
774                if (nameTypeA == NameLookupType.NICKNAME &&
775                        nameTypeB == NameLookupType.NICKNAME) {
776                    matcher.updateScoreWithNicknameMatch(contactId);
777                }
778            }
779        } finally {
780            c.close();
781        }
782    }
783
784    private void updateMatchScoresBasedOnEmailMatches(SQLiteDatabase db, long rawContactId,
785            ContactMatcher matcher) {
786        mSelectionArgs2[0] = String.valueOf(rawContactId);
787        mSelectionArgs2[1] = String.valueOf(mMimeTypeIdEmail);
788        Cursor c = db.query(EmailLookupQuery.TABLE, EmailLookupQuery.COLUMNS,
789                EmailLookupQuery.SELECTION,
790                mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING);
791        try {
792            while (c.moveToNext()) {
793                long contactId = c.getLong(EmailLookupQuery.CONTACT_ID);
794                matcher.updateScoreWithEmailMatch(contactId);
795            }
796        } finally {
797            c.close();
798        }
799    }
800
801    private void updateMatchScoresBasedOnPhoneMatches(SQLiteDatabase db, long rawContactId,
802            ContactMatcher matcher) {
803        mSelectionArgs2[0] = String.valueOf(rawContactId);
804        mSelectionArgs2[1] = mDbHelper.getUseStrictPhoneNumberComparisonParameter();
805        Cursor c = db.query(PhoneLookupQuery.TABLE, PhoneLookupQuery.COLUMNS,
806                PhoneLookupQuery.SELECTION,
807                mSelectionArgs2, null, null, null, SECONDARY_HIT_LIMIT_STRING);
808        try {
809            while (c.moveToNext()) {
810                long contactId = c.getLong(PhoneLookupQuery.CONTACT_ID);
811                matcher.updateScoreWithPhoneNumberMatch(contactId);
812            }
813        } finally {
814            c.close();
815        }
816    }
817
818    /**
819     * Loads name lookup rows for approximate name matching and updates match scores based on that
820     * data.
821     */
822    private void lookupApproximateNameMatches(SQLiteDatabase db, MatchCandidateList candidates,
823            ContactMatcher matcher) {
824        HashSet<String> firstLetters = new HashSet<String>();
825        for (int i = 0; i < candidates.mCount; i++) {
826            final NameMatchCandidate candidate = candidates.mList.get(i);
827            if (candidate.mName.length() >= 2) {
828                String firstLetter = candidate.mName.substring(0, 2);
829                if (!firstLetters.contains(firstLetter)) {
830                    firstLetters.add(firstLetter);
831                    final String selection = "(" + NameLookupColumns.NORMALIZED_NAME + " GLOB '"
832                            + firstLetter + "*') AND "
833                            + "(" + NameLookupColumns.NAME_TYPE + " IN("
834                                    + NameLookupType.NAME_COLLATION_KEY + ","
835                                    + NameLookupType.EMAIL_BASED_NICKNAME + ","
836                                    + NameLookupType.NICKNAME + ")) AND "
837                            + RawContacts.CONTACT_ID + " IN " + Tables.DEFAULT_DIRECTORY;
838                    matchAllCandidates(db, selection, candidates, matcher,
839                            ContactMatcher.MATCHING_ALGORITHM_APPROXIMATE,
840                            String.valueOf(FIRST_LETTER_SUGGESTION_HIT_LIMIT));
841                }
842            }
843        }
844    }
845
846    private interface ContactNameLookupQuery {
847        String TABLE = Tables.NAME_LOOKUP_JOIN_RAW_CONTACTS;
848
849        String[] COLUMNS = new String[] {
850                RawContacts.CONTACT_ID,
851                NameLookupColumns.NORMALIZED_NAME,
852                NameLookupColumns.NAME_TYPE
853        };
854
855        int CONTACT_ID = 0;
856        int NORMALIZED_NAME = 1;
857        int NAME_TYPE = 2;
858    }
859
860    /**
861     * Loads all candidate rows from the name lookup table and updates match scores based
862     * on that data.
863     */
864    private void matchAllCandidates(SQLiteDatabase db, String selection,
865            MatchCandidateList candidates, ContactMatcher matcher, int algorithm, String limit) {
866        final Cursor c = db.query(ContactNameLookupQuery.TABLE, ContactNameLookupQuery.COLUMNS,
867                selection, null, null, null, null, limit);
868
869        try {
870            while (c.moveToNext()) {
871                Long contactId = c.getLong(ContactNameLookupQuery.CONTACT_ID);
872                String name = c.getString(ContactNameLookupQuery.NORMALIZED_NAME);
873                int nameType = c.getInt(ContactNameLookupQuery.NAME_TYPE);
874
875                // Note the N^2 complexity of the following fragment. This is not a huge concern
876                // since the number of candidates is very small and in general secondary hits
877                // in the absence of primary hits are rare.
878                for (int i = 0; i < candidates.mCount; i++) {
879                    NameMatchCandidate candidate = candidates.mList.get(i);
880                    matcher.matchName(contactId, candidate.mLookupType, candidate.mName,
881                            nameType, name, algorithm);
882                }
883            }
884        } finally {
885            c.close();
886        }
887    }
888
889    /**
890     * Finds contacts with data matches and returns a list of {@link MatchScore}'s in the
891     * descending order of match score.
892     * @param parameters
893     */
894     protected List<MatchScore> findMatchingContacts(final SQLiteDatabase db, long contactId,
895            ArrayList<AggregationSuggestionParameter> parameters) {
896
897        MatchCandidateList candidates = new MatchCandidateList();
898        ContactMatcher matcher = new ContactMatcher();
899
900        // Don't aggregate a contact with itself
901        matcher.keepOut(contactId);
902
903        if (parameters == null || parameters.size() == 0) {
904            final Cursor c = db.query(RawContactIdQuery.TABLE, RawContactIdQuery.COLUMNS,
905                    RawContacts.CONTACT_ID + "=" + contactId, null, null, null, null);
906            try {
907                while (c.moveToNext()) {
908                    long rawContactId = c.getLong(RawContactIdQuery.RAW_CONTACT_ID);
909                    updateMatchScoresForSuggestionsBasedOnDataMatches(db, rawContactId, candidates,
910                            matcher);
911                }
912            } finally {
913                c.close();
914            }
915        } else {
916            updateMatchScoresForSuggestionsBasedOnDataMatches(db, candidates,
917                    matcher, parameters);
918        }
919
920        return matcher.pickBestMatches(ContactMatcher.SCORE_THRESHOLD_SUGGEST);
921    }
922
923    /**
924     * Computes scores for contacts that have matching data rows.
925     */
926    private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db,
927            long rawContactId, MatchCandidateList candidates, ContactMatcher matcher) {
928
929        updateMatchScoresBasedOnIdentityMatch(db, rawContactId, matcher);
930        updateMatchScoresBasedOnNameMatches(db, rawContactId, matcher);
931        updateMatchScoresBasedOnEmailMatches(db, rawContactId, matcher);
932        updateMatchScoresBasedOnPhoneMatches(db, rawContactId, matcher);
933        loadNameMatchCandidates(db, rawContactId, candidates, false);
934        lookupApproximateNameMatches(db, candidates, matcher);
935    }
936
937    private void updateMatchScoresForSuggestionsBasedOnDataMatches(SQLiteDatabase db,
938            MatchCandidateList candidates, ContactMatcher matcher,
939            ArrayList<AggregationSuggestionParameter> parameters) {
940        for (AggregationSuggestionParameter parameter : parameters) {
941            if (AggregationSuggestions.PARAMETER_MATCH_NAME.equals(parameter.kind)) {
942                updateMatchScoresBasedOnNameMatches(db, parameter.value, candidates, matcher);
943            }
944
945            // TODO: add support for other parameter kinds
946        }
947    }
948}
949