LevenshteinSuggestionFormatter.java revision 2fb3a129925a42e72944b836e85a1a2d55a0d950
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.quicksearchbox;
18
19import com.android.quicksearchbox.util.LevenshteinDistance;
20import com.google.common.annotations.VisibleForTesting;
21
22import android.text.SpannableString;
23import android.text.Spanned;
24import android.util.Log;
25
26import java.util.ArrayList;
27import java.util.List;
28
29/**
30 * Suggestion formatter using the Levenshtein distance (minumum edit distance) to calculate the
31 * formatting.
32 */
33public class LevenshteinSuggestionFormatter extends SuggestionFormatter {
34    private static final boolean DBG = false;
35    private static final String TAG = "QSB.LevenshteinSuggestionFormatter";
36
37    @Override
38    public Spanned formatSuggestion(CharSequence query, CharSequence suggestion) {
39        if (DBG) Log.d(TAG, "formatSuggestion('" + query + "', '" + suggestion + "')");
40        query = normalizeQuery(query);
41        List<Token> queryTokens = tokenize(query);
42        List<Token> suggestionTokens = tokenize(suggestion);
43        int[] matches = findMatches(queryTokens, suggestionTokens);
44        if (DBG){
45            Log.d(TAG, "source = " + queryTokens);
46            Log.d(TAG, "target = " + suggestionTokens);
47            Log.d(TAG, "matches = " + matches);
48        }
49        SpannableString str = new SpannableString(suggestion);
50
51        for (int i = 0; i < matches.length; ++i) {
52            Token t = suggestionTokens.get(i);
53            int sourceLen = 0;
54            if (matches[i] >= 0) {
55                sourceLen = queryTokens.get(matches[i]).getLength();
56            }
57            applySuggestedTextStyle(str, t.getStart() + sourceLen, t.getEnd());
58            applyQueryTextStyle(str, t.getStart(), t.getStart() + sourceLen);
59        }
60
61        return str;
62    }
63
64    private CharSequence normalizeQuery(CharSequence query) {
65        return query.toString().toLowerCase();
66    }
67
68    /**
69     * Finds which tokens in the target match tokens in the source.
70     *
71     * @param source List of source tokens (i.e. user query)
72     * @param target List of target tokens (i.e. suggestion)
73     * @return The indices into source which target tokens correspond to. A non-negative value n at
74     *      position i means that target token i matches source token n. A negative value means that
75     *      the target token i does not match any source token.
76     */
77    @VisibleForTesting
78    int[] findMatches(List<Token> source, List<Token> target) {
79        LevenshteinDistance<Token> table = new LevenshteinDistance<Token>(source, target) {
80            @Override
81            protected boolean match(Token source, Token target) {
82                return source.prefixOf(target);
83            }
84        };
85        table.calculate();
86        int[] result = new int[target.size()];
87        LevenshteinDistance.EditOperation[] ops = table.getTargetOperations();
88        for (int i = 0; i < result.length; ++i) {
89            if (ops[i].getType() == LevenshteinDistance.EDIT_UNCHANGED) {
90                result[i] = ops[i].getPosition();
91            } else {
92                result[i] = -1;
93            }
94        }
95        return result;
96    }
97
98    @VisibleForTesting
99    List<Token> tokenize(CharSequence seq) {
100        int pos = 0;
101        ArrayList<Token> tokens = new ArrayList<Token>();
102        while (pos < seq.length()) {
103            while (pos < seq.length() && Character.isWhitespace(seq.charAt(pos))) {
104                pos++;
105            }
106            int start = pos;
107            while (pos < seq.length() && !Character.isWhitespace(seq.charAt(pos))) {
108                pos++;
109            }
110            int end = pos;
111            if (start != end) {
112                Token t = new Token(seq, start, end);
113                tokens.add(t);
114            }
115        }
116        return tokens;
117    }
118
119    @VisibleForTesting
120    static class Token {
121        private final CharSequence mContainer;
122        private final int mStart;
123        private final int mEnd;
124
125        public Token(CharSequence container, int start, int end) {
126            mContainer = container;
127            mStart = start;
128            mEnd = end;
129        }
130
131        public int getStart() {
132            return mStart;
133        }
134
135        public int getEnd() {
136            return mEnd;
137        }
138
139        public int getLength() {
140            return mEnd - mStart;
141        }
142
143        @Override
144        public String toString() {
145            return getSequence().toString();
146        }
147
148        public CharSequence getSequence() {
149            return mContainer.subSequence(mStart, mEnd);
150        }
151
152        public boolean prefixOf(Token that) {
153            return that.toString().startsWith(toString());
154        }
155
156    }
157
158}
159