1ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard/** 2ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Copyright (C) 2012 The Android Open Source Project 3ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * 4ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * use this file except in compliance with the License. You may obtain a copy of 6ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * the License at 7ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * 8ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * http://www.apache.org/licenses/LICENSE-2.0 9ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * 10ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Unless required by applicable law or agreed to in writing, software 11ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * License for the specific language governing permissions and limitations under 14ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * the License. 15ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard */ 16ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard 17ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalardpackage com.android.inputmethod.latin.dicttool; 18ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard 1947cac57e4593f47e753410e4199e84e458d6de6fJean Chalardimport com.android.inputmethod.latin.makedict.FormatSpec; 20f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalardimport com.android.inputmethod.latin.makedict.FusionDictionary; 21576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 22516f86815ddec465e3d3ff59540d26913b05236fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WeightedString; 235f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WordProperty; 24f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalard 2551a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalardimport java.util.Arrays; 2674d66a5513c12539459dc872e7cca4521e908f1bJean Chalardimport java.util.ArrayList; 2774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard 28ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalardpublic class Info extends Dicttool.Command { 2977c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard public static final String COMMAND = "info"; 3077c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard 31ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard public Info() { 32ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard } 33ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard 3447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard @Override 35ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard public String getHelp() { 36a8058d169dad450eca428ca76c5a0f44e45f41a7Jean Chalard return COMMAND + " <filename>: prints various information about a dictionary file"; 37ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard } 38ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard 3951a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard private static void showInfo(final FusionDictionary dict, final boolean plumbing) { 4047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard System.out.println("Header attributes :"); 4151a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard System.out.print(dict.mOptions.toString(2, plumbing)); 4247cac57e4593f47e753410e4199e84e458d6de6fJean Chalard int wordCount = 0; 4347cac57e4593f47e753410e4199e84e458d6de6fJean Chalard int bigramCount = 0; 4447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard int shortcutCount = 0; 4547cac57e4593f47e753410e4199e84e458d6de6fJean Chalard int whitelistCount = 0; 465f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (final WordProperty wordProperty : dict) { 4747cac57e4593f47e753410e4199e84e458d6de6fJean Chalard ++wordCount; 485f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi if (null != wordProperty.mBigrams) { 495f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi bigramCount += wordProperty.mBigrams.size(); 5047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 515f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi if (null != wordProperty.mShortcutTargets) { 525f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi shortcutCount += wordProperty.mShortcutTargets.size(); 535f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 54df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY 55df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi == shortcutTarget.getProbability()) { 5647cac57e4593f47e753410e4199e84e458d6de6fJean Chalard ++whitelistCount; 5747cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 5847cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 5947cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 6047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 6147cac57e4593f47e753410e4199e84e458d6de6fJean Chalard System.out.println("Words in the dictionary : " + wordCount); 6247cac57e4593f47e753410e4199e84e458d6de6fJean Chalard System.out.println("Bigram count : " + bigramCount); 6347cac57e4593f47e753410e4199e84e458d6de6fJean Chalard System.out.println("Shortcuts : " + shortcutCount + " (out of which " + whitelistCount 6447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard + " whitelist entries)"); 6547cac57e4593f47e753410e4199e84e458d6de6fJean Chalard } 6647cac57e4593f47e753410e4199e84e458d6de6fJean Chalard 6751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard private static void showWordInfo(final FusionDictionary dict, final String word, 6851a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard final boolean plumbing) { 69576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 70576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada if (null == ptNode) { 7174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(word + " is not in the dictionary"); 7274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard return; 7374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 7474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println("Word: " + word); 758ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi System.out.println(" Freq: " + ptNode.getProbability()); 76576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada if (ptNode.getIsNotAWord()) { 7774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(" Is not a word"); 7874d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 79576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada if (ptNode.getIsBlacklistEntry()) { 8074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(" Is a blacklist entry"); 8174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 82576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets(); 8374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard if (null == shortcutTargets || shortcutTargets.isEmpty()) { 8474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(" No shortcuts"); 8574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } else { 8674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard for (final WeightedString shortcutTarget : shortcutTargets) { 8774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(" Shortcut target: " + shortcutTarget.mWord + " (" 88df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY 89df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi == shortcutTarget.getProbability() ? 90df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi "whitelist" : shortcutTarget.getProbability()) + ")"); 9174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 9274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 93576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada final ArrayList<WeightedString> bigrams = ptNode.getBigrams(); 9474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard if (null == bigrams || bigrams.isEmpty()) { 9574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard System.out.println(" No bigrams"); 9674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } else { 9774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard for (final WeightedString bigram : bigrams) { 98df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi System.out.println( 99df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi " Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")"); 10074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 10174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 10274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 10374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard 10447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard @Override 105ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard public void run() { 10677c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard if (mArgs.length < 1) { 10777c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard throw new RuntimeException("Not enough arguments for command " + COMMAND); 10877c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard } 10951a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard final boolean plumbing; 11051a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard if ("-p".equals(mArgs[0])) { 11151a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard plumbing = true; 11251a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length); 11351a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard if (mArgs.length != 1) { // There should be only 1 argument left 11451a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard throw new RuntimeException("Wrong number of arguments for command " + COMMAND); 11551a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard } 11651a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard } else { 11751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard plumbing = false; 11851a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard } 119f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalard final String filename = mArgs[0]; 12074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard final boolean hasWordArguments = (1 == mArgs.length); 1216ecc50a867dc09eb1d9dafe62f40e73de01b30cbJean Chalard final FusionDictionary dict = BinaryDictOffdeviceUtils.getDictionary(filename, 12274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard hasWordArguments /* report */); 12374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard if (hasWordArguments) { 12451a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard showInfo(dict, plumbing); 12574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } else { 12674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard for (int i = 1; i < mArgs.length; ++i) { 12751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard showWordInfo(dict, mArgs[i], plumbing); 12874d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 12974d66a5513c12539459dc872e7cca4521e908f1bJean Chalard } 130ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard } 131ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard} 132