LevenshteinDistance.java revision 2fb3a129925a42e72944b836e85a1a2d55a0d950
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.quicksearchbox.util;
18
19import java.util.List;
20
21/**
22 * This class represents the matrix used in the Levenshtein distance algorithm, together
23 * with the algorithm itself which operates on the matrix.
24 *
25 * We also track of the individual operations applied to transform the source string into the
26 * target string so we can trace the path taken through the matrix afterwards, in order to
27 * perform the formatting as required.
28 */
29public abstract class LevenshteinDistance<T> {
30    public static final int EDIT_DELETE = 0;
31    public static final int EDIT_INSERT = 1;
32    public static final int EDIT_REPLACE = 2;
33    public static final int EDIT_UNCHANGED = 3;
34
35    private final List<T> mSource;
36    private final List<T> mTarget;
37    private final Entry[][] mTable;
38
39    public LevenshteinDistance(List<T> source, List<T> target) {
40        mTable = new Entry[source.size()+1][target.size()+1];
41        mSource = source;
42        mTarget = target;
43        set(0, 0, EDIT_UNCHANGED, 0);
44        for (int i = 1; i <= source.size(); ++i) {
45            set(i, 0, EDIT_DELETE, i);
46        }
47        for (int i = 1; i <= target.size(); ++i) {
48            set(0, i, EDIT_INSERT, i);
49        }
50    }
51
52    /**
53     * Compares a source and target token.
54     * @param source Token from the source string.
55     * @param target Token from the target string.
56     * @return {@code true} if the two are the same for the purposes of edit distance calculation,
57     *      {@code false} otherwise.
58     */
59    protected abstract boolean match(T source, T target);
60
61    /**
62     * Implementation of Levenshtein distance algorithm.
63     *
64     * @return The Levenshtein distance.
65     */
66    public int calculate() {
67        for (int s = 1; s <= mSource.size(); ++s) {
68            T sourceToken = mSource.get(s-1);
69            for (int t = 1; t <= mTarget.size(); ++t) {
70                T targetToken = mTarget.get(t-1);
71                int cost = match(sourceToken, targetToken) ? 0 : 1;
72
73                Entry e = get(s - 1, t);
74                int distance = e.getDistance() + 1;
75                int type = EDIT_DELETE;
76
77                e = get(s, t - 1);
78                if (e.getDistance() + 1 < distance ) {
79                    distance = e.getDistance() + 1;
80                    type = EDIT_INSERT;
81                }
82
83                e = get(s - 1, t - 1);
84                if (e.getDistance() + cost < distance) {
85                    distance = e.getDistance() + cost;
86                    type = cost == 0 ? EDIT_UNCHANGED : EDIT_REPLACE;
87                }
88                set(s, t, type, distance);
89            }
90        }
91        return get(mSource.size(), mTarget.size()).getDistance();
92    }
93
94    /**
95     * Gets the list of operations which were applied to each target token; {@link #calculate} must
96     * have been called on this object before using this method.
97     * @return A list of {@link EditOperation}s indicating the origin of each token in the target
98     *      string. The position of the token indicates the position in the source string of the
99     *      token that was unchanged/replaced, or the position in the source after which a target
100     *      token was inserted.
101     */
102    public EditOperation[] getTargetOperations() {
103        EditOperation[] ops = new EditOperation[mTarget.size()];
104        int targetPos = mTarget.size();
105        int sourcePos = mSource.size();
106        while (targetPos > 0) {
107            Entry e = get(sourcePos, targetPos);
108            int editType = e.getEditType();
109            switch (editType) {
110                case LevenshteinDistance.EDIT_DELETE:
111                    sourcePos--;
112                    break;
113                case LevenshteinDistance.EDIT_INSERT:
114                    targetPos--;
115                    ops[targetPos] = new EditOperation(editType, sourcePos);
116                    break;
117                case LevenshteinDistance.EDIT_UNCHANGED:
118                case LevenshteinDistance.EDIT_REPLACE:
119                    targetPos--;
120                    sourcePos--;
121                    ops[targetPos] = new EditOperation(editType, sourcePos);
122                    break;
123            }
124        }
125
126        return ops;
127    }
128
129    private void set(int sourceIdx, int targetIdx, int editType, int distance) {
130        mTable[sourceIdx][targetIdx] = new Entry(editType, distance);
131    }
132
133    private Entry get(int sourceIdx, int targetIdx) {
134        Entry e = mTable[sourceIdx][targetIdx];
135        if (e == null) {
136            throw new NullPointerException("No entry at " + sourceIdx + "," + targetIdx +
137                    "; size: " + (mSource.size() + 1) + "," + (mTarget.size() + 1));
138        }
139        return e;
140    }
141
142    private static class Entry {
143        private final int mEditType;
144        private final int mDistance;
145        public Entry(int editType, int distance) {
146            mEditType = editType;
147            mDistance = distance;
148        }
149        public int getDistance() {
150            return mDistance;
151        }
152        public int getEditType() {
153            return mEditType;
154        }
155    }
156
157    public static class EditOperation {
158        private final int mType;
159        private final int mPosition;
160        public EditOperation(int type, int position) {
161            mType = type;
162            mPosition = position;
163        }
164        public int getType() {
165            return mType;
166        }
167        public int getPosition() {
168            return mPosition;
169        }
170    }
171
172}
173