12fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood/* 22fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Copyright (C) 2010 The Android Open Source Project 32fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * 42fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Licensed under the Apache License, Version 2.0 (the "License"); 52fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * you may not use this file except in compliance with the License. 62fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * You may obtain a copy of the License at 72fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * 82fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * http://www.apache.org/licenses/LICENSE-2.0 92fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * 102fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Unless required by applicable law or agreed to in writing, software 112fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * distributed under the License is distributed on an "AS IS" BASIS, 122fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * See the License for the specific language governing permissions and 142fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * limitations under the License. 152fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood */ 162fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 172fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodpackage com.android.quicksearchbox; 182fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 192fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport com.android.quicksearchbox.util.LevenshteinDistance; 20f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringertimport com.android.quicksearchbox.util.LevenshteinDistance.Token; 212fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport com.google.common.annotations.VisibleForTesting; 222fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 232fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.text.SpannableString; 242fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.text.Spanned; 252fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodimport android.util.Log; 262fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 272fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood/** 282fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Suggestion formatter using the Levenshtein distance (minumum edit distance) to calculate the 292fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * formatting. 302fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood */ 312fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwoodpublic class LevenshteinSuggestionFormatter extends SuggestionFormatter { 322fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood private static final boolean DBG = false; 332fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood private static final String TAG = "QSB.LevenshteinSuggestionFormatter"; 342fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 35132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood public LevenshteinSuggestionFormatter(TextAppearanceFactory spanFactory) { 36132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood super(spanFactory); 37132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood } 38132a8afcb4970d1b783a6dba7944dc0dd5063101Mathew Inwood 392fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood @Override 40f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert public Spanned formatSuggestion(String query, String suggestion) { 412fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood if (DBG) Log.d(TAG, "formatSuggestion('" + query + "', '" + suggestion + "')"); 422fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood query = normalizeQuery(query); 43fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final Token[] queryTokens = tokenize(query); 44fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final Token[] suggestionTokens = tokenize(suggestion); 45fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final int[] matches = findMatches(queryTokens, suggestionTokens); 462fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood if (DBG){ 472fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood Log.d(TAG, "source = " + queryTokens); 482fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood Log.d(TAG, "target = " + suggestionTokens); 492fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood Log.d(TAG, "matches = " + matches); 502fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 51fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final SpannableString str = new SpannableString(suggestion); 522fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 53fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final int matchesLen = matches.length; 54fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood for (int i = 0; i < matchesLen; ++i) { 55fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final Token t = suggestionTokens[i]; 562fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood int sourceLen = 0; 57fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood int thisMatch = matches[i]; 58fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood if (thisMatch >= 0) { 59fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood sourceLen = queryTokens[thisMatch].length(); 602fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 61fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood applySuggestedTextStyle(str, t.mStart + sourceLen, t.mEnd); 62fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood applyQueryTextStyle(str, t.mStart, t.mStart + sourceLen); 632fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 642fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 652fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood return str; 662fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 672fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 68f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert private String normalizeQuery(String query) { 69f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert return query.toLowerCase(); 702fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 712fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 722fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood /** 732fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * Finds which tokens in the target match tokens in the source. 742fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * 752fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * @param source List of source tokens (i.e. user query) 762fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * @param target List of target tokens (i.e. suggestion) 772fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * @return The indices into source which target tokens correspond to. A non-negative value n at 782fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * position i means that target token i matches source token n. A negative value means that 792fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood * the target token i does not match any source token. 802fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood */ 812fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood @VisibleForTesting 82fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood int[] findMatches(Token[] source, Token[] target) { 83f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert final LevenshteinDistance table = new LevenshteinDistance(source, target); 842fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood table.calculate(); 85fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final int targetLen = target.length; 86fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final int[] result = new int[targetLen]; 872fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood LevenshteinDistance.EditOperation[] ops = table.getTargetOperations(); 88fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood for (int i = 0; i < targetLen; ++i) { 892fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood if (ops[i].getType() == LevenshteinDistance.EDIT_UNCHANGED) { 902fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood result[i] = ops[i].getPosition(); 912fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } else { 922fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood result[i] = -1; 932fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 942fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 952fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood return result; 962fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 972fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 982fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood @VisibleForTesting 99f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert Token[] tokenize(final String seq) { 1002fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood int pos = 0; 101fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood final int len = seq.length(); 102f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert final char[] chars = seq.toCharArray(); 103f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert // There can't be more tokens than characters, make an array that is large enough 104f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert Token[] tokens = new Token[len]; 105f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert int tokenCount = 0; 106fbbf9be564b7675b2986dea38babfcbe55e5694bMathew Inwood while (pos < len) { 107f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert while (pos < len && (chars[pos] == ' ' || chars[pos] == '\t')) { 1082fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood pos++; 1092fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 1102fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood int start = pos; 111f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert while (pos < len && !(chars[pos] == ' ' || chars[pos] == '\t')) { 1122fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood pos++; 1132fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 1142fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood int end = pos; 1152fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood if (start != end) { 116f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert tokens[tokenCount++] = new Token(chars, start, end); 1172fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 1182fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 119f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert // Create a token array of the right size and return 120f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert Token[] ret = new Token[tokenCount]; 121f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert System.arraycopy(tokens, 0, ret, 0, tokenCount); 122f16bea9765ee9838d316655d38cb51b72a3b4acbBjorn Bringert return ret; 1232fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood } 1242fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood 1252fb3a129925a42e72944b836e85a1a2d55a0d950Mathew Inwood} 126