12fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood/*
22fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Copyright (C) 2010 The Android Open Source Project
32fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood *
42fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Licensed under the Apache License, Version 2.0 (the "License");
52fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * you may not use this file except in compliance with the License.
62fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * You may obtain a copy of the License at
72fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood *
82fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood *      http://www.apache.org/licenses/LICENSE-2.0
92fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood *
102fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Unless required by applicable law or agreed to in writing, software
112fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * distributed under the License is distributed on an "AS IS" BASIS,
122fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * See the License for the specific language governing permissions and
142fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * limitations under the License.
152fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood */
162fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
172fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodpackage com.android.quicksearchbox;
182fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
192fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport com.android.quicksearchbox.util.LevenshteinDistance;
20f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringertimport com.android.quicksearchbox.util.LevenshteinDistance.Token;
212fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport com.google.common.annotations.VisibleForTesting;
222fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
232fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.text.SpannableString;
242fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.text.Spanned;
252fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.util.Log;
262fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
272fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood/**
282fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Suggestion formatter using the Levenshtein distance (minumum edit distance) to calculate the
292fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * formatting.
302fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood */
312fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodpublic class LevenshteinSuggestionFormatter extends SuggestionFormatter {
322fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    private static final boolean DBG = false;
332fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    private static final String TAG = "QSB.LevenshteinSuggestionFormatter";
342fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
35132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood    public LevenshteinSuggestionFormatter(TextAppearanceFactory spanFactory) {
36132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood        super(spanFactory);
37132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood    }
38132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood
392fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    @Override
40f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert    public Spanned formatSuggestion(String query, String suggestion) {
412fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        if (DBG) Log.d(TAG, "formatSuggestion('" + query + "', '" + suggestion + "')");
422fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        query = normalizeQuery(query);
43fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final Token[] queryTokens = tokenize(query);
44fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final Token[] suggestionTokens = tokenize(suggestion);
45fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final int[] matches = findMatches(queryTokens, suggestionTokens);
462fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        if (DBG){
472fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            Log.d(TAG, "source = " + queryTokens);
482fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            Log.d(TAG, "target = " + suggestionTokens);
492fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            Log.d(TAG, "matches = " + matches);
502fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        }
51fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final SpannableString str = new SpannableString(suggestion);
522fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
53fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final int matchesLen = matches.length;
54fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        for (int i = 0; i < matchesLen; ++i) {
55fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood            final Token t = suggestionTokens[i];
562fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            int sourceLen = 0;
57fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood            int thisMatch = matches[i];
58fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood            if (thisMatch >= 0) {
59fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood                sourceLen = queryTokens[thisMatch].length();
602fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            }
61fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood            applySuggestedTextStyle(str, t.mStart + sourceLen, t.mEnd);
62fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood            applyQueryTextStyle(str, t.mStart, t.mStart + sourceLen);
632fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        }
642fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
652fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        return str;
662fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    }
672fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
68f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert    private String normalizeQuery(String query) {
69f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        return query.toLowerCase();
702fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    }
712fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
722fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    /**
732fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     * Finds which tokens in the target match tokens in the source.
742fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     *
752fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     * @param source List of source tokens (i.e. user query)
762fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     * @param target List of target tokens (i.e. suggestion)
772fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     * @return The indices into source which target tokens correspond to. A non-negative value n at
782fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     *      position i means that target token i matches source token n. A negative value means that
792fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     *      the target token i does not match any source token.
802fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood     */
812fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    @VisibleForTesting
82fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood    int[] findMatches(Token[] source, Token[] target) {
83f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        final LevenshteinDistance table = new LevenshteinDistance(source, target);
842fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        table.calculate();
85fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final int targetLen = target.length;
86fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final int[] result = new int[targetLen];
872fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        LevenshteinDistance.EditOperation[] ops = table.getTargetOperations();
88fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        for (int i = 0; i < targetLen; ++i) {
892fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            if (ops[i].getType() == LevenshteinDistance.EDIT_UNCHANGED) {
902fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood                result[i] = ops[i].getPosition();
912fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            } else {
922fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood                result[i] = -1;
932fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            }
942fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        }
952fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        return result;
962fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    }
972fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
982fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    @VisibleForTesting
99f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert    Token[] tokenize(final String seq) {
1002fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        int pos = 0;
101fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        final int len = seq.length();
102f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        final char[] chars = seq.toCharArray();
103f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        // There can't be more tokens than characters, make an array that is large enough
104f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        Token[] tokens = new Token[len];
105f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        int tokenCount = 0;
106fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood        while (pos < len) {
107f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert            while (pos < len && (chars[pos] == ' ' || chars[pos] == '\t')) {
1082fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood                pos++;
1092fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            }
1102fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            int start = pos;
111f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert            while (pos < len && !(chars[pos] == ' ' || chars[pos] == '\t')) {
1122fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood                pos++;
1132fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            }
1142fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            int end = pos;
1152fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            if (start != end) {
116f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert                tokens[tokenCount++] = new Token(chars, start, end);
1172fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood            }
1182fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood        }
119f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        // Create a token array of the right size and return
120f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        Token[] ret = new Token[tokenCount];
121f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        System.arraycopy(tokens, 0, ret, 0, tokenCount);
122f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert        return ret;
1232fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood    }
1242fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood
1252fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood}
126