LevenshteinSuggestionFormatter.java revision 2fb3a129925a42e72944b836e85a1a2d55a0d950
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.quicksearchbox; 18 19import com.android.quicksearchbox.util.LevenshteinDistance; 20import com.google.common.annotations.VisibleForTesting; 21 22import android.text.SpannableString; 23import android.text.Spanned; 24import android.util.Log; 25 26import java.util.ArrayList; 27import java.util.List; 28 29/** 30 * Suggestion formatter using the Levenshtein distance (minumum edit distance) to calculate the 31 * formatting. 32 */ 33public class LevenshteinSuggestionFormatter extends SuggestionFormatter { 34 private static final boolean DBG = false; 35 private static final String TAG = "QSB.LevenshteinSuggestionFormatter"; 36 37 @Override 38 public Spanned formatSuggestion(CharSequence query, CharSequence suggestion) { 39 if (DBG) Log.d(TAG, "formatSuggestion('" + query + "', '" + suggestion + "')"); 40 query = normalizeQuery(query); 41 List<Token> queryTokens = tokenize(query); 42 List<Token> suggestionTokens = tokenize(suggestion); 43 int[] matches = findMatches(queryTokens, suggestionTokens); 44 if (DBG){ 45 Log.d(TAG, "source = " + queryTokens); 46 Log.d(TAG, "target = " + suggestionTokens); 47 Log.d(TAG, "matches = " + matches); 48 } 49 SpannableString str = new SpannableString(suggestion); 50 51 for (int i = 0; i < matches.length; ++i) { 52 Token t = suggestionTokens.get(i); 53 int sourceLen = 0; 54 if (matches[i] >= 0) { 55 sourceLen = queryTokens.get(matches[i]).getLength(); 56 } 57 applySuggestedTextStyle(str, t.getStart() + sourceLen, t.getEnd()); 58 applyQueryTextStyle(str, t.getStart(), t.getStart() + sourceLen); 59 } 60 61 return str; 62 } 63 64 private CharSequence normalizeQuery(CharSequence query) { 65 return query.toString().toLowerCase(); 66 } 67 68 /** 69 * Finds which tokens in the target match tokens in the source. 70 * 71 * @param source List of source tokens (i.e. user query) 72 * @param target List of target tokens (i.e. suggestion) 73 * @return The indices into source which target tokens correspond to. A non-negative value n at 74 * position i means that target token i matches source token n. A negative value means that 75 * the target token i does not match any source token. 76 */ 77 @VisibleForTesting 78 int[] findMatches(List<Token> source, List<Token> target) { 79 LevenshteinDistance<Token> table = new LevenshteinDistance<Token>(source, target) { 80 @Override 81 protected boolean match(Token source, Token target) { 82 return source.prefixOf(target); 83 } 84 }; 85 table.calculate(); 86 int[] result = new int[target.size()]; 87 LevenshteinDistance.EditOperation[] ops = table.getTargetOperations(); 88 for (int i = 0; i < result.length; ++i) { 89 if (ops[i].getType() == LevenshteinDistance.EDIT_UNCHANGED) { 90 result[i] = ops[i].getPosition(); 91 } else { 92 result[i] = -1; 93 } 94 } 95 return result; 96 } 97 98 @VisibleForTesting 99 List<Token> tokenize(CharSequence seq) { 100 int pos = 0; 101 ArrayList<Token> tokens = new ArrayList<Token>(); 102 while (pos < seq.length()) { 103 while (pos < seq.length() && Character.isWhitespace(seq.charAt(pos))) { 104 pos++; 105 } 106 int start = pos; 107 while (pos < seq.length() && !Character.isWhitespace(seq.charAt(pos))) { 108 pos++; 109 } 110 int end = pos; 111 if (start != end) { 112 Token t = new Token(seq, start, end); 113 tokens.add(t); 114 } 115 } 116 return tokens; 117 } 118 119 @VisibleForTesting 120 static class Token { 121 private final CharSequence mContainer; 122 private final int mStart; 123 private final int mEnd; 124 125 public Token(CharSequence container, int start, int end) { 126 mContainer = container; 127 mStart = start; 128 mEnd = end; 129 } 130 131 public int getStart() { 132 return mStart; 133 } 134 135 public int getEnd() { 136 return mEnd; 137 } 138 139 public int getLength() { 140 return mEnd - mStart; 141 } 142 143 @Override 144 public String toString() { 145 return getSequence().toString(); 146 } 147 148 public CharSequence getSequence() { 149 return mContainer.subSequence(mStart, mEnd); 150 } 151 152 public boolean prefixOf(Token that) { 153 return that.toString().startsWith(toString()); 154 } 155 156 } 157 158} 159