14be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi/*
24be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Copyright (C) 2013 The Android Open Source Project
34be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi *
44be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
54be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
64be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * You may obtain a copy of the License at
74be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi *
84be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
94be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi *
104be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
114be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
124be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
134be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * See the License for the specific language governing permissions and
144be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * limitations under the License.
154be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi */
164be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
174be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagipackage com.android.inputmethod.latin;
184be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
194be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport android.test.AndroidTestCase;
204be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport android.test.suitebuilder.annotation.LargeTest;
21bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagiimport android.text.TextUtils;
22f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagiimport android.util.Pair;
234be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
24bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagiimport com.android.inputmethod.latin.NgramContext.WordInfo;
2554a1b8ec54c0a001cae93af43e25a8e35de02ff1Ken Wakasaimport com.android.inputmethod.latin.common.CodePointUtils;
265b91b551e5ffaf2c2e691dfbd434f21c82293986Jean Chalardimport com.android.inputmethod.latin.common.FileUtils;
271085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.DictionaryHeader;
284be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.FormatSpec;
29516f86815ddec465e3d3ff59540d26913b05236fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WeightedString;
305f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WordProperty;
31e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
324be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
334be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.io.File;
344be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.io.IOException;
3571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagiimport java.util.ArrayList;
364be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.util.HashMap;
3731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagiimport java.util.HashSet;
384be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.util.Locale;
3978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagiimport java.util.Random;
404be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
414be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi@LargeTest
424be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagipublic class BinaryDictionaryTests extends AndroidTestCase {
434be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
444be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi    private static final String TEST_LOCALE = "test";
45ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    private static final String DICTIONARY_ID = "TestBinaryDictionary";
464be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
47ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    private HashSet<File> mDictFilesToBeDeleted = new HashSet<>();
48ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi
49ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    @Override
50ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    protected void setUp() throws Exception {
51ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        super.setUp();
52ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        mDictFilesToBeDeleted.clear();
53ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    }
54ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi
55ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    @Override
56ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    protected void tearDown() throws Exception {
57ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        for (final File dictFile : mDictFilesToBeDeleted) {
58ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi            dictFile.delete();
59ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        }
60ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        mDictFilesToBeDeleted.clear();
61ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        super.tearDown();
62ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    }
63ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi
64ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    private File createEmptyDictionaryAndGetFile(final int formatVersion) {
651085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        return createEmptyDictionaryWithAttributesAndGetFile(formatVersion,
661085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi                new HashMap<String, String>());
671085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi    }
681085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi
691085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi    private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion,
701085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi            final HashMap<String, String> attributeMap) {
71ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        try {
72ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion,
73ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                    attributeMap);
74ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            mDictFilesToBeDeleted.add(dictFile);
75ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            return dictFile;
76ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        } catch (final IOException e) {
77ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            fail(e.toString());
782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        }
79ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        return null;
802fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    }
812fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
821085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi    private File createEmptyVer4DictionaryAndGetFile(final int formatVersion,
831085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi            final HashMap<String, String> attributeMap) throws IOException {
84ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION,
854be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi                getContext().getCacheDir());
862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        file.delete();
872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        file.mkdir();
88455dc84cf2c6526329b535f30000ea45b7d4d4d7Keisuke Kuroyanagi        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
8943cf9076b2d053c554941e55f6073b8f586c510bJean Chalard                Locale.ENGLISH, attributeMap)) {
905ef6209656c51df0f0542d2a75c2df93c8d0f027Keisuke Kuroyanagi            return file;
915ef6209656c51df0f0542d2a75c2df93c8d0f027Keisuke Kuroyanagi        }
925f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka        throw new IOException("Empty dictionary " + file.getAbsolutePath()
935f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka                + " cannot be created. Format version: " + formatVersion);
944be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi    }
954be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi
965f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka    private static BinaryDictionary getBinaryDictionary(final File dictFile) {
97ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        return new BinaryDictionary(dictFile.getAbsolutePath(),
98ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
99ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
100ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    }
101ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi
102ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) {
103ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        final File dictFile = createEmptyDictionaryAndGetFile(formatVersion);
104ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        return new BinaryDictionary(dictFile.getAbsolutePath(),
105ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
106ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
107ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi    }
108ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi
1094be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi    public void testIsValidDictionary() {
110c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
111ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
1124be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
1134be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi                binaryDictionary.isValidDictionary());
1144be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi        binaryDictionary.close();
1154be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi        assertFalse("binaryDictionary must be invalid after closing.",
1164be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi                binaryDictionary.isValidDictionary());
1172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        FileUtils.deleteRecursively(dictFile);
118ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        binaryDictionary = getBinaryDictionary(dictFile);
1194be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
1204be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi                binaryDictionary.isValidDictionary());
1214be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi        binaryDictionary.close();
1224be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi    }
1235b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi
1243b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi    public void testConstructingDictionaryOnMemory() {
125c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
1263b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        FileUtils.deleteRecursively(dictFile);
1273b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertFalse(dictFile.exists());
128ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
129c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic                true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE,
130c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic                FormatSpec.VERSION403, new HashMap<String, String>());
1313b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertTrue(binaryDictionary.isValidDictionary());
132c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
1333b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        final int probability = 100;
1343b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        addUnigramWord(binaryDictionary, "word", probability);
1353b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("word"));
1363b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertFalse(dictFile.exists());
1373b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        binaryDictionary.flush();
1383b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertTrue(dictFile.exists());
1393b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertTrue(binaryDictionary.isValidDictionary());
140c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion());
1413b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("word"));
1423b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi        binaryDictionary.close();
1433b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi    }
1443b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi
14551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi    public void testAddTooLongWord() {
146c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
14751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        final StringBuffer stringBuilder = new StringBuffer();
14829500ef4ba8e01f4c467a62399c8249d532ee82cMohammadinamul Sheik        for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) {
14951c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi            stringBuilder.append('a');
15051c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        }
15151c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        final String validLongWord = stringBuilder.toString();
15251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        stringBuilder.append('a');
15351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        final String invalidLongWord = stringBuilder.toString();
15451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        final int probability = 100;
15551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, "aaa", probability);
15651c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, validLongWord, probability);
15751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, invalidLongWord, probability);
15851c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        // Too long short cut.
15912d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic        binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */,
16005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu                false /* isNotAWord */, false /* isPossiblyOffensive */,
16151c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
16251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, "abc", probability);
16351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        final int updatedProbability = 200;
16451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        // Update.
16551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
16651c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
16751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, "abc", updatedProbability);
16851c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi
16951c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
17051c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
1715f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord));
17251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
17351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi    }
17451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi
175e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
1762fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            final int probability) {
17712d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic        binaryDictionary.addUnigramEntry(word, probability,
1781adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                false /* isBeginningOfSentence */, false /* isNotAWord */,
17905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu                false /* isPossiblyOffensive */,
18005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1812fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    }
1822fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
183e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
1842fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            final String word1, final int probability) {
185bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability,
1862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    }
1882fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
18912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0,
19012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi            final String word1, final String word2, final int probability) {
19177fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic        binaryDictionary.addNgramEntry(
19277fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic                new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2,
19377fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic                probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
19412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    }
19512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
196e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
197e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi            final String word0, final String word1) {
198bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1);
199e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    }
200e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi
201e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    private static int getBigramProbability(final BinaryDictionary binaryDictionary,
202e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi            final String word0,  final String word1) {
203bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1);
204e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi    }
205e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi
20612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    private static int getTrigramProbability(final BinaryDictionary binaryDictionary,
20712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi            final String word0, final String word1, final String word2) {
20877fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic        return binaryDictionary.getNgramProbability(
20977fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic                new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2);
21012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    }
21112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
2125b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi    public void testAddUnigramWord() {
213c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
2145b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        final int probability = 100;
2152fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", probability);
2165b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Reallocate and create.
2172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aab", probability);
2185b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Insert into children.
2192fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aac", probability);
2205b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Make terminal.
2212fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aa", probability);
2225b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Create children.
2232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaaa", probability);
2245b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Reallocate and make termianl.
2252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "a", probability);
2265b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi
2275b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        final int updatedProbability = 200;
2285b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        // Update.
2292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", updatedProbability);
2305b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi
2315b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aab"));
2325b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aac"));
23378b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aa"));
2345b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
2355b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("a"));
2365b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
23778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi    }
23878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi
23978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi    public void testRandomlyAddUnigramWord() {
24078b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        final int wordCount = 1000;
24178b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        final int codePointSetSize = 50;
242bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
243c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
24478b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi
245a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> probabilityMap = new HashMap<>();
24678b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        // Test a word that isn't contained within the dictionary.
24778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        final Random random = new Random(seed);
24878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
24978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        for (int i = 0; i < wordCount; ++i) {
25078b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi            final String word = CodePointUtils.generateWord(random, codePointSet);
2514d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi            probabilityMap.put(word, random.nextInt(0xFF));
25278b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        }
25378b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        for (String word : probabilityMap.keySet()) {
2542fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
25578b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        }
25678b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        for (String word : probabilityMap.keySet()) {
25778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
25878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi        }
2595b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi    }
260cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi
261cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi    public void testAddBigramWords() {
262c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
263cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi
264cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi        final int unigramProbability = 100;
26564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi        final int bigramProbability = 150;
26664341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi        final int updatedBigramProbability = 200;
2672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
2682fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abb", unigramProbability);
2692fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
2702fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
2712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
2722fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
2732fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
274cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi
275e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
276e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
277e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
278e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
279ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
280ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
281ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
282ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
2834d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi
2842fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
285ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(updatedBigramProbability,
286ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "aaa", "abb"));
287cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi
288e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
289e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
290e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
2914d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY,
292e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "bcc", "aaa"));
2934d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY,
294e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "bcc", "bbc"));
2954d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY,
296e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "aaa", "aaa"));
2974d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi
2984d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        // Testing bigram link.
2992fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
3002fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
3012fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
3022fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
3032fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abc", unigramProbability);
3042fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "f", unigramProbability);
30564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi
306ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij"));
3074d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY,
308e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "abcde", "fgh"));
3092fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
310ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(updatedBigramProbability,
311ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                getBigramProbability(binaryDictionary, "abcde", "fghij"));
312cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi    }
31371cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi
31471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi    public void testRandomlyAddBigramWords() {
31571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        final int wordCount = 100;
31671cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        final int bigramCount = 1000;
31771cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        final int codePointSetSize = 50;
318bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
319bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final Random random = new Random(seed);
320c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
321bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi
322a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
323a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
32471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
325a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
326a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
327bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi
32871cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        for (int i = 0; i < wordCount; ++i) {
32971cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi            final String word = CodePointUtils.generateWord(random, codePointSet);
33071cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi            words.add(word);
3314d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi            final int unigramProbability = random.nextInt(0xFF);
332bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            unigramProbabilities.put(word, unigramProbability);
3332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addUnigramWord(binaryDictionary, word, unigramProbability);
33471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        }
33571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi
33671cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        for (int i = 0; i < bigramCount; i++) {
337bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final String word0 = words.get(random.nextInt(wordCount));
338bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final String word1 = words.get(random.nextInt(wordCount));
339bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            if (TextUtils.equals(word0, word1)) {
340bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                continue;
341bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            }
342a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            final Pair<String, String> bigram = new Pair<>(word0, word1);
343bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            bigramWords.add(bigram);
34464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int unigramProbability = unigramProbabilities.get(word1);
34564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int bigramProbability =
34664341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    unigramProbability + random.nextInt(0xFF - unigramProbability);
347bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            bigramProbabilities.put(bigram, bigramProbability);
3482fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
34971cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        }
35071cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi
351bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        for (final Pair<String, String> bigram : bigramWords) {
352bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final int bigramProbability = bigramProbabilities.get(bigram);
35364341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
354e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
355ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            assertEquals(bigramProbability,
356ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                    getBigramProbability(binaryDictionary, bigram.first, bigram.second));
35771cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi        }
35871cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi    }
359d9f450ef00f09a9eccfc677968b46e072267a5f2Keisuke Kuroyanagi
36012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    public void testAddTrigramWords() {
361c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
36212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        final int unigramProbability = 100;
36312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        final int trigramProbability = 150;
36412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        final int updatedTrigramProbability = 200;
36512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
36612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addUnigramWord(binaryDictionary, "abb", unigramProbability);
36712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
36812a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
36912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addBigramWords(binaryDictionary, "abb", "bcc", 10);
37012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addBigramWords(binaryDictionary, "abb", "aaa", 10);
37112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
37212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability);
37312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability);
37412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
37512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        assertEquals(trigramProbability,
37612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi                getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc"));
37712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        assertEquals(trigramProbability,
37812a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi                getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
37912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
38012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
38112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability);
38212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi        assertEquals(updatedTrigramProbability,
38312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi                getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa"));
38412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi    }
38512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi
38615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi    public void testFlushDictionary() {
387c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
388ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
38915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
39015605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        final int probability = 100;
3912fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", probability);
3922fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abcd", probability);
39315605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        // Close without flushing.
39415605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.close();
39515605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
39615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
39715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
39815605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
39915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
400484fa7b59cb0659ac18fa68da5c7b641d9255be8Keisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
401484fa7b59cb0659ac18fa68da5c7b641d9255be8Keisuke Kuroyanagi        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
40215605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
4032fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", probability);
4042fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abcd", probability);
40515605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.flush();
40615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.close();
40715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
408ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        binaryDictionary = getBinaryDictionary(dictFile);
40915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
41015605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
4112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "bcde", probability);
41215605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.flush();
41315605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.close();
41415605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi
415ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        binaryDictionary = getBinaryDictionary(dictFile);
41615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
41715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi        binaryDictionary.close();
41815605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi    }
4192cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi
4202cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi    public void testFlushWithGCDictionary() {
421c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
422ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
4232cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        final int unigramProbability = 100;
42464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi        final int bigramProbability = 150;
4252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
4262fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "abb", unigramProbability);
4272fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
4282fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
4292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
4302fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
4312fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
4322cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        binaryDictionary.flushWithGC();
4332cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        binaryDictionary.close();
4342cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi
435ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        binaryDictionary = getBinaryDictionary(dictFile);
4362cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
4372cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
4382cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
439ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
440ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
441ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
442ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi        assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
443e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
444e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
445e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
4462cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        binaryDictionary.flushWithGC();
4472cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi        binaryDictionary.close();
4482cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi    }
449f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
450f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi    public void testAddBigramWordsAndFlashWithGC() {
451f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int wordCount = 100;
452f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int bigramCount = 1000;
453f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int codePointSetSize = 30;
454bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
455bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final Random random = new Random(seed);
456f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
457c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
458ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
459bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi
460a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
461a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
462f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
463a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
464a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
465bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi
466f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        for (int i = 0; i < wordCount; ++i) {
467f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            final String word = CodePointUtils.generateWord(random, codePointSet);
468f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            words.add(word);
469f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            final int unigramProbability = random.nextInt(0xFF);
470bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            unigramProbabilities.put(word, unigramProbability);
4712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addUnigramWord(binaryDictionary, word, unigramProbability);
472f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        }
473f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
474f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        for (int i = 0; i < bigramCount; i++) {
475bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final String word0 = words.get(random.nextInt(wordCount));
476bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final String word1 = words.get(random.nextInt(wordCount));
477bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            if (TextUtils.equals(word0, word1)) {
478bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                continue;
479bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            }
480a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            final Pair<String, String> bigram = new Pair<>(word0, word1);
481bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            bigramWords.add(bigram);
48264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int unigramProbability = unigramProbabilities.get(word1);
48364341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int bigramProbability =
48464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    unigramProbability + random.nextInt(0xFF - unigramProbability);
485bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            bigramProbabilities.put(bigram, bigramProbability);
4862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
487f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        }
488f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
489f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        binaryDictionary.flushWithGC();
490f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        binaryDictionary.close();
491ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        binaryDictionary = getBinaryDictionary(dictFile);
49264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi
493bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        for (final Pair<String, String> bigram : bigramWords) {
494bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi            final int bigramProbability = bigramProbabilities.get(bigram);
49564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
496e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
497ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi            assertEquals(bigramProbability,
498ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                    getBigramProbability(binaryDictionary, bigram.first, bigram.second));
499f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        }
500f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi    }
501f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
5022fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    public void testRandomOperationsAndFlashWithGC() {
5031085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final int maxUnigramCount = 5000;
5041085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final int maxBigramCount = 10000;
5051085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final HashMap<String, String> attributeMap = new HashMap<>();
5061085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
5071085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
5081085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi
509f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int flashWithGCIterationCount = 50;
510f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int operationCountInEachIteration = 200;
511f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int initialUnigramCount = 100;
512f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final float addUnigramProb = 0.5f;
513f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final float addBigramProb = 0.8f;
514f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int codePointSetSize = 30;
515f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
516bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
517f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final Random random = new Random(seed);
518c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
5191085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi                attributeMap);
520ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
521f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
522a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
523a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
524f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
525a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
526a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
527f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        for (int i = 0; i < initialUnigramCount; ++i) {
528f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            final String word = CodePointUtils.generateWord(random, codePointSet);
529f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            words.add(word);
530f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            final int unigramProbability = random.nextInt(0xFF);
531f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            unigramProbabilities.put(word, unigramProbability);
5322fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            addUnigramWord(binaryDictionary, word, unigramProbability);
533f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        }
534f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        binaryDictionary.flushWithGC();
535f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        binaryDictionary.close();
536f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
537f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
538ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi            binaryDictionary = getBinaryDictionary(dictFile);
539f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
540f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                // Add unigram.
541f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                if (random.nextFloat() < addUnigramProb) {
542f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    final String word = CodePointUtils.generateWord(random, codePointSet);
543f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    words.add(word);
544f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    final int unigramProbability = random.nextInt(0xFF);
545f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    unigramProbabilities.put(word, unigramProbability);
5462fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                    addUnigramWord(binaryDictionary, word, unigramProbability);
547f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                }
548f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                // Add bigram.
549f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                if (random.nextFloat() < addBigramProb && words.size() > 2) {
550f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    final int word0Index = random.nextInt(words.size());
551f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    int word1Index = random.nextInt(words.size() - 1);
552f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    if (word0Index <= word1Index) {
553f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                        word1Index++;
554f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    }
555f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    final String word0 = words.get(word0Index);
556f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    final String word1 = words.get(word1Index);
557bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                    if (TextUtils.equals(word0, word1)) {
558bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                        continue;
559bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                    }
56064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    final int unigramProbability = unigramProbabilities.get(word1);
56164341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    final int bigramProbability =
56264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                            unigramProbability + random.nextInt(0xFF - unigramProbability);
563a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                    final Pair<String, String> bigram = new Pair<>(word0, word1);
564f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    bigramWords.add(bigram);
565f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    bigramProbabilities.put(bigram, bigramProbability);
5662fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
567f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                }
568f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            }
569f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi
570f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            // Test whether the all unigram operations are collectlly handled.
571f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            for (int i = 0; i < words.size(); i++) {
572f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                final String word = words.get(i);
573f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                final int unigramProbability = unigramProbabilities.get(word);
574f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
575f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            }
576f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            // Test whether the all bigram operations are collectlly handled.
577f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            for (int i = 0; i < bigramWords.size(); i++) {
578f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                final Pair<String, String> bigram = bigramWords.get(i);
579f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                final int probability;
580f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                if (bigramProbabilities.containsKey(bigram)) {
58177fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic                    probability = bigramProbabilities.get(bigram);
582f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                } else {
583f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                    probability = Dictionary.NOT_A_PROBABILITY;
584f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi                }
58564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi
586ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                assertEquals(probability,
587ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
58864341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
589e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi                        isValidBigram(binaryDictionary, bigram.first, bigram.second));
590f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            }
591f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            binaryDictionary.flushWithGC();
592f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi            binaryDictionary.close();
593f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi        }
594f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi    }
5953d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
5963d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi    public void testAddManyUnigramsAndFlushWithGC() {
5973d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        final int flashWithGCIterationCount = 3;
5983d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        final int codePointSetSize = 50;
5993d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
600bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
6013d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        final Random random = new Random(seed);
6023d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
603c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
6043d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
605a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
606a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
6073d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
6083d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
6093d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        BinaryDictionary binaryDictionary;
6103d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        for (int i = 0; i < flashWithGCIterationCount; i++) {
611ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi            binaryDictionary = getBinaryDictionary(dictFile);
612b698e9c1fab9df8e1cd58f997ad62147522538fcKeisuke Kuroyanagi            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
6133d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                final String word = CodePointUtils.generateWord(random, codePointSet);
6143d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                words.add(word);
6153d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                final int unigramProbability = random.nextInt(0xFF);
6163d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                unigramProbabilities.put(word, unigramProbability);
6172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                addUnigramWord(binaryDictionary, word, unigramProbability);
6183d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi            }
6193d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
6203d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi            for (int j = 0; j < words.size(); j++) {
6213d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                final String word = words.get(j);
6223d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                final int unigramProbability = unigramProbabilities.get(word);
6233d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
6243d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi            }
6253d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi
6263d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi            binaryDictionary.flushWithGC();
6273d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi            binaryDictionary.close();
6283d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi        }
6293d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi    }
63031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
63131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi    public void testUnigramAndBigramCount() {
6321085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final int maxUnigramCount = 5000;
6331085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final int maxBigramCount = 10000;
6341085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        final HashMap<String, String> attributeMap = new HashMap<>();
6351085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount));
6361085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi        attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount));
6371085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi
63831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final int flashWithGCIterationCount = 10;
63931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final int codePointSetSize = 50;
64031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final int unigramCountPerIteration = 1000;
64131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final int bigramCountPerIteration = 2000;
642bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi        final long seed = System.currentTimeMillis();
64331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final Random random = new Random(seed);
644c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403,
6451085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi                attributeMap);
64631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
647a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
648a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashSet<Pair<String, String>> bigrams = new HashSet<>();
64931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
65031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi
65131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        BinaryDictionary binaryDictionary;
65231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        for (int i = 0; i < flashWithGCIterationCount; i++) {
653ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi            binaryDictionary = getBinaryDictionary(dictFile);
65431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            for (int j = 0; j < unigramCountPerIteration; j++) {
65531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                final String word = CodePointUtils.generateWord(random, codePointSet);
65631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                words.add(word);
65731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                final int unigramProbability = random.nextInt(0xFF);
6582fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                addUnigramWord(binaryDictionary, word, unigramProbability);
65931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            }
66031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            for (int j = 0; j < bigramCountPerIteration; j++) {
66131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                final String word0 = words.get(random.nextInt(words.size()));
66231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                final String word1 = words.get(random.nextInt(words.size()));
663bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                if (TextUtils.equals(word0, word1)) {
664bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                    continue;
665bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi                }
666a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                bigrams.add(new Pair<>(word0, word1));
66731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi                final int bigramProbability = random.nextInt(0xF);
6682fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
66931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            }
670a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
671743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                    binaryDictionary.getPropertyForGettingStats(
672743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                            BinaryDictionary.UNIGRAM_COUNT_QUERY)));
673a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
674743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                    binaryDictionary.getPropertyForGettingStats(
675743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                            BinaryDictionary.BIGRAM_COUNT_QUERY)));
67631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            binaryDictionary.flushWithGC();
677a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
678743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                    binaryDictionary.getPropertyForGettingStats(
679743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                            BinaryDictionary.UNIGRAM_COUNT_QUERY)));
680a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
681743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                    binaryDictionary.getPropertyForGettingStats(
682743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi                            BinaryDictionary.BIGRAM_COUNT_QUERY)));
68331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi            binaryDictionary.close();
68431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi        }
68531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi    }
6862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
68794d9a2309fbca6b1e42b6c57b9c9509182fe8a0bKeisuke Kuroyanagi    public void testGetWordProperties() {
6882fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        final long seed = System.currentTimeMillis();
6892fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        final Random random = new Random(seed);
6909221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        final int UNIGRAM_COUNT = 1000;
6919221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        final int BIGRAM_COUNT = 1000;
6922fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        final int codePointSetSize = 20;
6932fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
694c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403);
695ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi        final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile);
6962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
69788fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
69888fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi                false /* isBeginningOfSentence */);
69994d9a2309fbca6b1e42b6c57b9c9509182fe8a0bKeisuke Kuroyanagi        assertFalse(invalidWordProperty.isValid());
7002fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
701a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
702a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> wordProbabilities = new HashMap<>();
703a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
704a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
7059221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi
7069221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        for (int i = 0; i < UNIGRAM_COUNT; i++) {
7072fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            final String word = CodePointUtils.generateWord(random, codePointSet);
7082fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            final int unigramProbability = random.nextInt(0xFF);
7092fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            final boolean isNotAWord = random.nextBoolean();
71005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            final boolean isPossiblyOffensive = random.nextBoolean();
7112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            // TODO: Add tests for historical info.
712e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi            binaryDictionary.addUnigramEntry(word, unigramProbability,
71305172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu                    false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive,
7141adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
7159221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
7169221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                binaryDictionary.flushWithGC();
7179221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7189221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            words.add(word);
7199221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            wordProbabilities.put(word, unigramProbability);
72088fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
72188fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi                    false /* isBeginningOfSentence */);
7225f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            assertEquals(word, wordProperty.mWord);
7235f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            assertTrue(wordProperty.isValid());
7245f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
72505172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu            assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive);
726c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            assertEquals(false, wordProperty.mHasNgrams);
7275f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
7289221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        }
7299221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi
7309221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        for (int i = 0; i < BIGRAM_COUNT; i++) {
7319221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final int word0Index = random.nextInt(wordProbabilities.size());
7329221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final int word1Index = random.nextInt(wordProbabilities.size());
7339221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            if (word0Index == word1Index) {
7349221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                continue;
7359221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7369221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final String word0 = words.get(word0Index);
7379221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final String word1 = words.get(word1Index);
73864341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int unigramProbability = wordProbabilities.get(word1);
73964341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int bigramProbability =
74064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    unigramProbability + random.nextInt(0xFF - unigramProbability);
741e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
7429221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
7439221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                binaryDictionary.flushWithGC();
7449221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7459221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            if (!bigrams.containsKey(word0)) {
746a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                final HashSet<String> bigramWord1s = new HashSet<>();
7479221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                bigrams.put(word0, bigramWord1s);
7489221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7499221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            bigrams.get(word0).add(word1);
750a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
7519221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        }
7529221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi
7539221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi        for (int i = 0; i < words.size(); i++) {
7549221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final String word0 = words.get(i);
7559221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            if (!bigrams.containsKey(word0)) {
7569221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                continue;
7579221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7589221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            final HashSet<String> bigramWord1s = bigrams.get(word0);
75988fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
76088fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi                    false /* isBeginningOfSentence */);
761c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
762c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            // TODO: Support ngram.
763c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
764c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi                final String word1 = bigramTarget.mWord;
7659221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi                assertTrue(bigramWord1s.contains(word1));
766ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
767ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                assertEquals(bigramProbability, bigramTarget.getProbability());
7689221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi            }
7692fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa        }
7702fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa    }
7712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa
772941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi    public void testIterateAllWords() {
773941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final long seed = System.currentTimeMillis();
774941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final Random random = new Random(seed);
775941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final int UNIGRAM_COUNT = 1000;
776941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final int BIGRAM_COUNT = 1000;
777941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final int codePointSetSize = 20;
778941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
779c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
780941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
78188fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
78288fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi                false /* isBeginningOfSentence */);
783941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        assertFalse(invalidWordProperty.isValid());
784941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
785a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<String> words = new ArrayList<>();
786a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
787a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
788941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
789a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                new HashMap<>();
790941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
791941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        for (int i = 0; i < UNIGRAM_COUNT; i++) {
792941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final String word = CodePointUtils.generateWord(random, codePointSet);
793941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final int unigramProbability = random.nextInt(0xFF);
794941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            addUnigramWord(binaryDictionary, word, unigramProbability);
795941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
796941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                binaryDictionary.flushWithGC();
797941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            }
798941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            words.add(word);
799941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            wordProbabilitiesToCheckLater.put(word, unigramProbability);
800941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        }
801941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
802941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        for (int i = 0; i < BIGRAM_COUNT; i++) {
803941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
804941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
805941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            if (word0Index == word1Index) {
806941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                continue;
807941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            }
808941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final String word0 = words.get(word0Index);
809941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final String word1 = words.get(word1Index);
81064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
81164341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi            final int bigramProbability =
81264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                    unigramProbability + random.nextInt(0xFF - unigramProbability);
813e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
814941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
815941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                binaryDictionary.flushWithGC();
816941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            }
817941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            if (!bigrams.containsKey(word0)) {
818a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                final HashSet<String> bigramWord1s = new HashSet<>();
819941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                bigrams.put(word0, bigramWord1s);
820941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            }
821941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            bigrams.get(word0).add(word1);
822a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka            bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
823941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        }
824941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
825a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final HashSet<String> wordSet = new HashSet<>(words);
826941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        final HashSet<Pair<String, String>> bigramSet =
827a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka                new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
828941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        int token = 0;
829941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        do {
830941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final BinaryDictionary.GetNextWordPropertyResult result =
831941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                    binaryDictionary.getNextWordProperty(token);
832941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final WordProperty wordProperty = result.mWordProperty;
8335f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            final String word0 = wordProperty.mWord;
834941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
835941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi                    wordProperty.mProbabilityInfo.mProbability);
836941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            wordSet.remove(word0);
837941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            final HashSet<String> bigramWord1s = bigrams.get(word0);
838c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi            // TODO: Support ngram.
8392522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi            if (wordProperty.mHasNgrams) {
8402522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi                for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
8412522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi                    final String word1 = bigramTarget.mWord;
8422522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi                    assertTrue(bigramWord1s.contains(word1));
8432522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi                    final Pair<String, String> bigram = new Pair<>(word0, word1);
844ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                    final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
845ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                    assertEquals(bigramProbability, bigramTarget.getProbability());
8462522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi                    bigramSet.remove(bigram);
84764341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi                }
848941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            }
849941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi            token = result.mNextToken;
850941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        } while (token != 0);
851941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        assertTrue(wordSet.isEmpty());
852941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi        assertTrue(bigramSet.isEmpty());
853941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi    }
854941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi
85505172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu    public void testPossiblyOffensiveAttributeMaintained() {
85605172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu        final BinaryDictionary binaryDictionary =
857ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi                getEmptyBinaryDictionary(FormatSpec.VERSION403);
85812d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic        binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0);
85905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu        WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false);
86005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu        assertEquals(true, wordProperty.mIsPossiblyOffensive);
86105172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu    }
8625f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka
8631adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    public void testBeginningOfSentence() {
864c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic        final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403);
8651adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        final int dummyProbability = 0;
866bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE;
8671adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        final int bigramProbability = 200;
8681adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
869bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
8701adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
8711adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        assertEquals(bigramProbability,
872bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
873bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability,
8741adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
8751adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
876bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi        binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability,
8771adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
8781adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        binaryDictionary.flushWithGC();
8791adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        assertEquals(bigramProbability,
880bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa"));
8811adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi        assertEquals(bigramProbability,
882bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi                binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb"));
8831adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi    }
8844be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi}
885