1ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard/**
2ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Copyright (C) 2012 The Android Open Source Project
3ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard *
4ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * use this file except in compliance with the License. You may obtain a copy of
6ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * the License at
7ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard *
8ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * http://www.apache.org/licenses/LICENSE-2.0
9ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard *
10ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * Unless required by applicable law or agreed to in writing, software
11ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * License for the specific language governing permissions and limitations under
14ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard * the License.
15ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard */
16ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard
17ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalardpackage com.android.inputmethod.latin.dicttool;
18ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard
1947cac57e4593f47e753410e4199e84e458d6de6fJean Chalardimport com.android.inputmethod.latin.makedict.FormatSpec;
20f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalardimport com.android.inputmethod.latin.makedict.FusionDictionary;
21576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
22516f86815ddec465e3d3ff59540d26913b05236fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WeightedString;
235f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WordProperty;
24f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalard
2551a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalardimport java.util.Arrays;
2674d66a5513c12539459dc872e7cca4521e908f1bJean Chalardimport java.util.ArrayList;
2774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard
28ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalardpublic class Info extends Dicttool.Command {
2977c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard    public static final String COMMAND = "info";
3077c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard
31ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    public Info() {
32ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    }
33ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard
3447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard    @Override
35ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    public String getHelp() {
36a8058d169dad450eca428ca76c5a0f44e45f41a7Jean Chalard        return COMMAND + " <filename>: prints various information about a dictionary file";
37ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    }
38ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard
3951a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard    private static void showInfo(final FusionDictionary dict, final boolean plumbing) {
4047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        System.out.println("Header attributes :");
4151a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard        System.out.print(dict.mOptions.toString(2, plumbing));
4247cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        int wordCount = 0;
4347cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        int bigramCount = 0;
4447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        int shortcutCount = 0;
4547cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        int whitelistCount = 0;
465f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi        for (final WordProperty wordProperty : dict) {
4747cac57e4593f47e753410e4199e84e458d6de6fJean Chalard            ++wordCount;
485f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            if (null != wordProperty.mBigrams) {
495f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi                bigramCount += wordProperty.mBigrams.size();
5047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard            }
515f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            if (null != wordProperty.mShortcutTargets) {
525f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi                shortcutCount += wordProperty.mShortcutTargets.size();
535f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi                for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
54df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                    if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
55df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                            == shortcutTarget.getProbability()) {
5647cac57e4593f47e753410e4199e84e458d6de6fJean Chalard                        ++whitelistCount;
5747cac57e4593f47e753410e4199e84e458d6de6fJean Chalard                    }
5847cac57e4593f47e753410e4199e84e458d6de6fJean Chalard                }
5947cac57e4593f47e753410e4199e84e458d6de6fJean Chalard            }
6047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        }
6147cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        System.out.println("Words in the dictionary : " + wordCount);
6247cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        System.out.println("Bigram count : " + bigramCount);
6347cac57e4593f47e753410e4199e84e458d6de6fJean Chalard        System.out.println("Shortcuts : " + shortcutCount + " (out of which " + whitelistCount
6447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard                + " whitelist entries)");
6547cac57e4593f47e753410e4199e84e458d6de6fJean Chalard    }
6647cac57e4593f47e753410e4199e84e458d6de6fJean Chalard
6751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard    private static void showWordInfo(final FusionDictionary dict, final String word,
6851a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            final boolean plumbing) {
69576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
70576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        if (null == ptNode) {
7174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            System.out.println(word + " is not in the dictionary");
7274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            return;
7374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
7474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        System.out.println("Word: " + word);
758ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi        System.out.println("  Freq: " + ptNode.getProbability());
76576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        if (ptNode.getIsNotAWord()) {
7774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            System.out.println("  Is not a word");
7874d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
79576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        if (ptNode.getIsBlacklistEntry()) {
8074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            System.out.println("  Is a blacklist entry");
8174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
82576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        final ArrayList<WeightedString> shortcutTargets = ptNode.getShortcutTargets();
8374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        if (null == shortcutTargets || shortcutTargets.isEmpty()) {
8474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            System.out.println("  No shortcuts");
8574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        } else {
8674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            for (final WeightedString shortcutTarget : shortcutTargets) {
8774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard                System.out.println("  Shortcut target: " + shortcutTarget.mWord + " ("
88df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                        + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
89df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                                == shortcutTarget.getProbability() ?
90df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                                        "whitelist" : shortcutTarget.getProbability()) + ")");
9174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            }
9274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
93576f625ee1b22e26baab46cc4ad3138e901383e2Yuichiro Hanada        final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
9474d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        if (null == bigrams || bigrams.isEmpty()) {
9574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            System.out.println("  No bigrams");
9674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        } else {
9774d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            for (final WeightedString bigram : bigrams) {
98df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                System.out.println(
99df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                        "  Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")");
10074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            }
10174d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
10274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard    }
10374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard
10447cac57e4593f47e753410e4199e84e458d6de6fJean Chalard    @Override
105ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    public void run() {
10677c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard        if (mArgs.length < 1) {
10777c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard            throw new RuntimeException("Not enough arguments for command " + COMMAND);
10877c8c738374c5f63cad0ef015904d37b7591a203Jean Chalard        }
10951a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard        final boolean plumbing;
11051a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard        if ("-p".equals(mArgs[0])) {
11151a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            plumbing = true;
11251a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
11351a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            if (mArgs.length != 1) { // There should be only 1 argument left
11451a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard                throw new RuntimeException("Wrong number of arguments for command " + COMMAND);
11551a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            }
11651a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard        } else {
11751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            plumbing = false;
11851a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard        }
119f1d35ac5dc0cca2b357940cab1001cadca37bcb4Jean Chalard        final String filename = mArgs[0];
12074d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        final boolean hasWordArguments = (1 == mArgs.length);
1216ecc50a867dc09eb1d9dafe62f40e73de01b30cbJean Chalard        final FusionDictionary dict = BinaryDictOffdeviceUtils.getDictionary(filename,
12274d66a5513c12539459dc872e7cca4521e908f1bJean Chalard                hasWordArguments /* report */);
12374d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        if (hasWordArguments) {
12451a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard            showInfo(dict, plumbing);
12574d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        } else {
12674d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            for (int i = 1; i < mArgs.length; ++i) {
12751a0ef8c59ea590b6e5e80a82fc75bf244084270Jean Chalard                showWordInfo(dict, mArgs[i], plumbing);
12874d66a5513c12539459dc872e7cca4521e908f1bJean Chalard            }
12974d66a5513c12539459dc872e7cca4521e908f1bJean Chalard        }
130ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard    }
131ddcb4847df9c9c04e1c8a118f99655a4a93cda40Jean Chalard}
132