Diff.java revision a91561aa58db1c43092c1caecc051a11fa5391c7
1/**
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.dicttool;
18
19import com.android.inputmethod.latin.makedict.FusionDictionary;
20import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
21import com.android.inputmethod.latin.makedict.WeightedString;
22import com.android.inputmethod.latin.makedict.WordProperty;
23
24import java.util.Arrays;
25import java.util.ArrayList;
26import java.util.HashMap;
27
28public class Diff extends Dicttool.Command {
29    public static final String COMMAND = "diff";
30
31    public Diff() {
32    }
33
34    @Override
35    public String getHelp() {
36        return COMMAND + " [-p] <dict> <dict> : shows differences between two dictionaries.\n"
37                + "  If -p (plumbing) option is given, produce output suitable for a script";
38    }
39
40    @Override
41    public void run() {
42        if (mArgs.length < 2) {
43            throw new RuntimeException("Not enough arguments for command " + COMMAND);
44        }
45        final boolean plumbing;
46        if ("-p".equals(mArgs[0])) {
47            plumbing = true;
48            mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
49            if (mArgs.length != 2) { // There should be only 2 arguments left
50                throw new RuntimeException("Wrong number of arguments for command " + COMMAND);
51            }
52        } else {
53            plumbing = false;
54        }
55        final FusionDictionary dict0 =
56                BinaryDictOffdeviceUtils.getDictionary(mArgs[0], false /* report */);
57        if (null == dict0) throw new RuntimeException("Can't read dictionary " + mArgs[0]);
58        final FusionDictionary dict1 =
59                BinaryDictOffdeviceUtils.getDictionary(mArgs[1], false /* report */);
60        if (null == dict1) throw new RuntimeException("Can't read dictionary " + mArgs[1]);
61        if (!plumbing) {
62            System.out.println("Header :");
63            diffHeaders(dict0, dict1);
64            if (languageDiffers(dict0, dict1)) {
65                // We only check for the language here. The rationale is that one may meaningfully
66                // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a
67                // pt_BR dict is almost certainly only interested in header-level diff, and the word
68                // diff would be very large, meaningless, and annoying.
69                return;
70            }
71            System.out.println("Body :");
72        }
73        diffWords(dict0, dict1);
74    }
75
76    private static boolean languageDiffers(final FusionDictionary dict0,
77            final FusionDictionary dict1) {
78        // If either of the dictionaries have no locale, assume it's okay
79        if (null == dict0.mOptions.mAttributes.get("locale")) return false;
80        if (null == dict1.mOptions.mAttributes.get("locale")) return false;
81        final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0];
82        final String dict1Lang = dict1.mOptions.mAttributes.get("locale").split("_", 3)[0];
83        return !dict0Lang.equals(dict1Lang);
84    }
85
86    private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) {
87        boolean hasDifferences = false;
88        final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes);
89        for (final String optionKey : dict0.mOptions.mAttributes.keySet()) {
90            if (!dict0.mOptions.mAttributes.get(optionKey).equals(
91                    dict1.mOptions.mAttributes.get(optionKey))) {
92                System.out.println("  " + optionKey + " : "
93                        + dict0.mOptions.mAttributes.get(optionKey) + " <=> "
94                        + dict1.mOptions.mAttributes.get(optionKey));
95                hasDifferences = true;
96            }
97            options1.remove(optionKey);
98        }
99        for (final String optionKey : options1.keySet()) {
100            System.out.println("  " + optionKey + " : null <=> " + options1.get(optionKey));
101            hasDifferences = true;
102        }
103        if (!hasDifferences) {
104            System.out.println("  No differences");
105        }
106    }
107
108    private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
109        boolean hasDifferences = false;
110        for (final WordProperty word0Property : dict0) {
111            final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
112                    word0Property.mWord);
113            if (null == word1PtNode) {
114                // This word is not in dict1
115                System.out.println("Deleted: " + word0Property.mWord + " "
116                        + word0Property.getProbability());
117                hasDifferences = true;
118            } else {
119                // We found the word. Compare frequencies, shortcuts, bigrams
120                if (word0Property.getProbability() != word1PtNode.getProbability()) {
121                    System.out.println("Probability changed: " + word0Property.mWord + " "
122                            + word0Property.getProbability() + " -> "
123                            + word1PtNode.getProbability());
124                    hasDifferences = true;
125                }
126                if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
127                    System.out.println("Not a word: " + word0Property.mWord + " "
128                            + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
129                    hasDifferences = true;
130                }
131                if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
132                    System.out.println("Blacklist: " + word0Property.mWord + " "
133                            + word0Property.mIsBlacklistEntry + " -> "
134                            + word1PtNode.getIsBlacklistEntry());
135                    hasDifferences = true;
136                }
137                hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
138                        "Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
139                hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
140                        "Shortcut", word0Property.mShortcutTargets,
141                        word1PtNode.getShortcutTargets());
142            }
143        }
144        for (final WordProperty word1Property : dict1) {
145            final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
146                    word1Property.mWord);
147            if (null == word0PtNode) {
148                // This word is not in dict0
149                System.out.println("Added: " + word1Property.mWord + " "
150                        + word1Property.getProbability());
151                hasDifferences = true;
152            }
153        }
154        if (!hasDifferences) {
155            System.out.println("  No differences");
156        }
157    }
158
159    private static boolean hasAttributesDifferencesAndPrintThemIfAny(final String word,
160            final String type, final ArrayList<WeightedString> list0,
161            final ArrayList<WeightedString> list1) {
162        if (null == list1) {
163            if (null == list0) return false;
164            for (final WeightedString attribute0 : list0) {
165                System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
166                        + attribute0.getProbability());
167            }
168            return true;
169        }
170        boolean hasDifferences = false;
171        if (null != list0) {
172            for (final WeightedString attribute0 : list0) {
173                // The following tests with #equals(). The WeightedString#equals() method returns
174                // true if both the string and the frequency are the same.
175                if (!list1.contains(attribute0)) {
176                    hasDifferences = true;
177                    // Search for a word with the same string but a different frequency
178                    boolean foundString = false;
179                    for (final WeightedString attribute1 : list1) {
180                        if (attribute0.mWord.equals(attribute1.mWord)) {
181                            System.out.println(type + " freq changed: " + word + " "
182                                    + attribute0.mWord + " " + attribute0.getProbability() + " -> "
183                                    + attribute1.getProbability());
184                            list1.remove(attribute1);
185                            foundString = true;
186                            break;
187                        }
188                    }
189                    if (!foundString) {
190                        // We come here if we haven't found any matching string.
191                        System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
192                                + attribute0.getProbability());
193                    }
194                } else {
195                    list1.remove(attribute0);
196                }
197            }
198        }
199        // We removed any matching word that we found, so now list1 only contains words that
200        // are not included in list0.
201        for (final WeightedString attribute1 : list1) {
202            hasDifferences = true;
203            System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
204                    + attribute1.getProbability());
205        }
206        return hasDifferences;
207    }
208}
209