Diff.java revision a91561aa58db1c43092c1caecc051a11fa5391c7
1/** 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.dicttool; 18 19import com.android.inputmethod.latin.makedict.FusionDictionary; 20import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 21import com.android.inputmethod.latin.makedict.WeightedString; 22import com.android.inputmethod.latin.makedict.WordProperty; 23 24import java.util.Arrays; 25import java.util.ArrayList; 26import java.util.HashMap; 27 28public class Diff extends Dicttool.Command { 29 public static final String COMMAND = "diff"; 30 31 public Diff() { 32 } 33 34 @Override 35 public String getHelp() { 36 return COMMAND + " [-p] <dict> <dict> : shows differences between two dictionaries.\n" 37 + " If -p (plumbing) option is given, produce output suitable for a script"; 38 } 39 40 @Override 41 public void run() { 42 if (mArgs.length < 2) { 43 throw new RuntimeException("Not enough arguments for command " + COMMAND); 44 } 45 final boolean plumbing; 46 if ("-p".equals(mArgs[0])) { 47 plumbing = true; 48 mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length); 49 if (mArgs.length != 2) { // There should be only 2 arguments left 50 throw new RuntimeException("Wrong number of arguments for command " + COMMAND); 51 } 52 } else { 53 plumbing = false; 54 } 55 final FusionDictionary dict0 = 56 BinaryDictOffdeviceUtils.getDictionary(mArgs[0], false /* report */); 57 if (null == dict0) throw new RuntimeException("Can't read dictionary " + mArgs[0]); 58 final FusionDictionary dict1 = 59 BinaryDictOffdeviceUtils.getDictionary(mArgs[1], false /* report */); 60 if (null == dict1) throw new RuntimeException("Can't read dictionary " + mArgs[1]); 61 if (!plumbing) { 62 System.out.println("Header :"); 63 diffHeaders(dict0, dict1); 64 if (languageDiffers(dict0, dict1)) { 65 // We only check for the language here. The rationale is that one may meaningfully 66 // diff a en_US with a en_GB dictionary, but someone who diffs a de dict with a 67 // pt_BR dict is almost certainly only interested in header-level diff, and the word 68 // diff would be very large, meaningless, and annoying. 69 return; 70 } 71 System.out.println("Body :"); 72 } 73 diffWords(dict0, dict1); 74 } 75 76 private static boolean languageDiffers(final FusionDictionary dict0, 77 final FusionDictionary dict1) { 78 // If either of the dictionaries have no locale, assume it's okay 79 if (null == dict0.mOptions.mAttributes.get("locale")) return false; 80 if (null == dict1.mOptions.mAttributes.get("locale")) return false; 81 final String dict0Lang = dict0.mOptions.mAttributes.get("locale").split("_", 3)[0]; 82 final String dict1Lang = dict1.mOptions.mAttributes.get("locale").split("_", 3)[0]; 83 return !dict0Lang.equals(dict1Lang); 84 } 85 86 private static void diffHeaders(final FusionDictionary dict0, final FusionDictionary dict1) { 87 boolean hasDifferences = false; 88 final HashMap<String, String> options1 = new HashMap<>(dict1.mOptions.mAttributes); 89 for (final String optionKey : dict0.mOptions.mAttributes.keySet()) { 90 if (!dict0.mOptions.mAttributes.get(optionKey).equals( 91 dict1.mOptions.mAttributes.get(optionKey))) { 92 System.out.println(" " + optionKey + " : " 93 + dict0.mOptions.mAttributes.get(optionKey) + " <=> " 94 + dict1.mOptions.mAttributes.get(optionKey)); 95 hasDifferences = true; 96 } 97 options1.remove(optionKey); 98 } 99 for (final String optionKey : options1.keySet()) { 100 System.out.println(" " + optionKey + " : null <=> " + options1.get(optionKey)); 101 hasDifferences = true; 102 } 103 if (!hasDifferences) { 104 System.out.println(" No differences"); 105 } 106 } 107 108 private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) { 109 boolean hasDifferences = false; 110 for (final WordProperty word0Property : dict0) { 111 final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray, 112 word0Property.mWord); 113 if (null == word1PtNode) { 114 // This word is not in dict1 115 System.out.println("Deleted: " + word0Property.mWord + " " 116 + word0Property.getProbability()); 117 hasDifferences = true; 118 } else { 119 // We found the word. Compare frequencies, shortcuts, bigrams 120 if (word0Property.getProbability() != word1PtNode.getProbability()) { 121 System.out.println("Probability changed: " + word0Property.mWord + " " 122 + word0Property.getProbability() + " -> " 123 + word1PtNode.getProbability()); 124 hasDifferences = true; 125 } 126 if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) { 127 System.out.println("Not a word: " + word0Property.mWord + " " 128 + word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord()); 129 hasDifferences = true; 130 } 131 if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) { 132 System.out.println("Blacklist: " + word0Property.mWord + " " 133 + word0Property.mIsBlacklistEntry + " -> " 134 + word1PtNode.getIsBlacklistEntry()); 135 hasDifferences = true; 136 } 137 hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, 138 "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); 139 hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, 140 "Shortcut", word0Property.mShortcutTargets, 141 word1PtNode.getShortcutTargets()); 142 } 143 } 144 for (final WordProperty word1Property : dict1) { 145 final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray, 146 word1Property.mWord); 147 if (null == word0PtNode) { 148 // This word is not in dict0 149 System.out.println("Added: " + word1Property.mWord + " " 150 + word1Property.getProbability()); 151 hasDifferences = true; 152 } 153 } 154 if (!hasDifferences) { 155 System.out.println(" No differences"); 156 } 157 } 158 159 private static boolean hasAttributesDifferencesAndPrintThemIfAny(final String word, 160 final String type, final ArrayList<WeightedString> list0, 161 final ArrayList<WeightedString> list1) { 162 if (null == list1) { 163 if (null == list0) return false; 164 for (final WeightedString attribute0 : list0) { 165 System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " 166 + attribute0.getProbability()); 167 } 168 return true; 169 } 170 boolean hasDifferences = false; 171 if (null != list0) { 172 for (final WeightedString attribute0 : list0) { 173 // The following tests with #equals(). The WeightedString#equals() method returns 174 // true if both the string and the frequency are the same. 175 if (!list1.contains(attribute0)) { 176 hasDifferences = true; 177 // Search for a word with the same string but a different frequency 178 boolean foundString = false; 179 for (final WeightedString attribute1 : list1) { 180 if (attribute0.mWord.equals(attribute1.mWord)) { 181 System.out.println(type + " freq changed: " + word + " " 182 + attribute0.mWord + " " + attribute0.getProbability() + " -> " 183 + attribute1.getProbability()); 184 list1.remove(attribute1); 185 foundString = true; 186 break; 187 } 188 } 189 if (!foundString) { 190 // We come here if we haven't found any matching string. 191 System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " 192 + attribute0.getProbability()); 193 } 194 } else { 195 list1.remove(attribute0); 196 } 197 } 198 } 199 // We removed any matching word that we found, so now list1 only contains words that 200 // are not included in list0. 201 for (final WeightedString attribute1 : list1) { 202 hasDifferences = true; 203 System.out.println(type + " added: " + word + " " + attribute1.mWord + " " 204 + attribute1.getProbability()); 205 } 206 return hasDifferences; 207 } 208} 209