14be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi/* 24be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Copyright (C) 2013 The Android Open Source Project 34be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * 44be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 54be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 64be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * You may obtain a copy of the License at 74be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * 84be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 94be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * 104be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 114be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 124be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 134be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * See the License for the specific language governing permissions and 144be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi * limitations under the License. 154be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi */ 164be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 174be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagipackage com.android.inputmethod.latin; 184be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 194be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport android.test.AndroidTestCase; 204be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport android.test.suitebuilder.annotation.LargeTest; 21bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagiimport android.text.TextUtils; 22f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagiimport android.util.Pair; 234be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 24bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagiimport com.android.inputmethod.latin.NgramContext.WordInfo; 2554a1b8ec54c0a001cae93af43e25a8e35de02ff1Ken Wakasaimport com.android.inputmethod.latin.common.CodePointUtils; 265b91b551e5ffaf2c2e691dfbd434f21c82293986Jean Chalardimport com.android.inputmethod.latin.common.FileUtils; 271085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.DictionaryHeader; 284be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.FormatSpec; 29516f86815ddec465e3d3ff59540d26913b05236fKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WeightedString; 305f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.WordProperty; 31e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 324be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 334be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.io.File; 344be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.io.IOException; 3571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagiimport java.util.ArrayList; 364be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.util.HashMap; 3731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagiimport java.util.HashSet; 384be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagiimport java.util.Locale; 3978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagiimport java.util.Random; 404be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 414be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi@LargeTest 424be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagipublic class BinaryDictionaryTests extends AndroidTestCase { 434be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 444be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi private static final String TEST_LOCALE = "test"; 45ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi private static final String DICTIONARY_ID = "TestBinaryDictionary"; 464be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 47ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi private HashSet<File> mDictFilesToBeDeleted = new HashSet<>(); 48ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 49ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi @Override 50ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi protected void setUp() throws Exception { 51ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi super.setUp(); 52ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi mDictFilesToBeDeleted.clear(); 53ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi } 54ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 55ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi @Override 56ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi protected void tearDown() throws Exception { 57ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi for (final File dictFile : mDictFilesToBeDeleted) { 58ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi dictFile.delete(); 59ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi } 60ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi mDictFilesToBeDeleted.clear(); 61ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi super.tearDown(); 62ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi } 63ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 64ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi private File createEmptyDictionaryAndGetFile(final int formatVersion) { 651085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi return createEmptyDictionaryWithAttributesAndGetFile(formatVersion, 661085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi new HashMap<String, String>()); 671085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi } 681085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi 691085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi private File createEmptyDictionaryWithAttributesAndGetFile(final int formatVersion, 701085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final HashMap<String, String> attributeMap) { 71ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi try { 72ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi final File dictFile = createEmptyVer4DictionaryAndGetFile(formatVersion, 73ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi attributeMap); 74ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi mDictFilesToBeDeleted.add(dictFile); 75ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi return dictFile; 76ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi } catch (final IOException e) { 77ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi fail(e.toString()); 782fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 79ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi return null; 802fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 812fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 821085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi private File createEmptyVer4DictionaryAndGetFile(final int formatVersion, 831085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final HashMap<String, String> attributeMap) throws IOException { 84ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi final File file = File.createTempFile(DICTIONARY_ID, TEST_DICT_FILE_EXTENSION, 854be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi getContext().getCacheDir()); 862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa file.delete(); 872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa file.mkdir(); 88455dc84cf2c6526329b535f30000ea45b7d4d4d7Keisuke Kuroyanagi if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, 8943cf9076b2d053c554941e55f6073b8f586c510bJean Chalard Locale.ENGLISH, attributeMap)) { 905ef6209656c51df0f0542d2a75c2df93c8d0f027Keisuke Kuroyanagi return file; 915ef6209656c51df0f0542d2a75c2df93c8d0f027Keisuke Kuroyanagi } 925f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka throw new IOException("Empty dictionary " + file.getAbsolutePath() 935f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka + " cannot be created. Format version: " + formatVersion); 944be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi } 954be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi 965f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka private static BinaryDictionary getBinaryDictionary(final File dictFile) { 97ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi return new BinaryDictionary(dictFile.getAbsolutePath(), 98ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 99ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 100ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi } 101ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 102ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi private BinaryDictionary getEmptyBinaryDictionary(final int formatVersion) { 103ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi final File dictFile = createEmptyDictionaryAndGetFile(formatVersion); 104ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi return new BinaryDictionary(dictFile.getAbsolutePath(), 105ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 106ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 107ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi } 108ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi 1094be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi public void testIsValidDictionary() { 110c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 111ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 1124be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 1134be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi binaryDictionary.isValidDictionary()); 1144be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi binaryDictionary.close(); 1154be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi assertFalse("binaryDictionary must be invalid after closing.", 1164be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi binaryDictionary.isValidDictionary()); 1172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa FileUtils.deleteRecursively(dictFile); 118ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 1194be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 1204be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi binaryDictionary.isValidDictionary()); 1214be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi binaryDictionary.close(); 1224be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi } 1235b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi 1243b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi public void testConstructingDictionaryOnMemory() { 125c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 1263b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi FileUtils.deleteRecursively(dictFile); 1273b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertFalse(dictFile.exists()); 128ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 129c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, 130c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic FormatSpec.VERSION403, new HashMap<String, String>()); 1313b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertTrue(binaryDictionary.isValidDictionary()); 132c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); 1333b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi final int probability = 100; 1343b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi addUnigramWord(binaryDictionary, "word", probability); 1353b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("word")); 1363b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertFalse(dictFile.exists()); 1373b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi binaryDictionary.flush(); 1383b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertTrue(dictFile.exists()); 1393b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertTrue(binaryDictionary.isValidDictionary()); 140c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic assertEquals(FormatSpec.VERSION403, binaryDictionary.getFormatVersion()); 1413b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("word")); 1423b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi binaryDictionary.close(); 1433b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi } 1443b7984752c88bff157016a09158dc92d94ed401dKeisuke Kuroyanagi 14551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi public void testAddTooLongWord() { 146c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 14751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi final StringBuffer stringBuilder = new StringBuffer(); 14829500ef4ba8e01f4c467a62399c8249d532ee82cMohammadinamul Sheik for (int i = 0; i < BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH; i++) { 14951c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi stringBuilder.append('a'); 15051c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi } 15151c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi final String validLongWord = stringBuilder.toString(); 15251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi stringBuilder.append('a'); 15351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi final String invalidLongWord = stringBuilder.toString(); 15451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi final int probability = 100; 15551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, "aaa", probability); 15651c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, validLongWord, probability); 15751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, invalidLongWord, probability); 15851c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi // Too long short cut. 15912d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic binaryDictionary.addUnigramEntry("a", probability, false /* isBeginningOfSentence */, 16005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu false /* isNotAWord */, false /* isPossiblyOffensive */, 16151c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi BinaryDictionary.NOT_A_VALID_TIMESTAMP); 16251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, "abc", probability); 16351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi final int updatedProbability = 200; 16451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi // Update. 16551c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, validLongWord, updatedProbability); 16651c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); 16751c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi addUnigramWord(binaryDictionary, "abc", updatedProbability); 16851c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi 16951c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aaa")); 17051c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); 1715f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency(invalidLongWord)); 17251c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); 17351c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi } 17451c5ec10f9e5ab748e9aeec23ce5fc52c976b0e5Keisuke Kuroyanagi 175e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, 1762fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final int probability) { 17712d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic binaryDictionary.addUnigramEntry(word, probability, 1781adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi false /* isBeginningOfSentence */, false /* isNotAWord */, 17905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu false /* isPossiblyOffensive */, 18005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1812fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 1822fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 183e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, 1842fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final String word1, final int probability) { 185bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.addNgramEntry(new NgramContext(new WordInfo(word0)), word1, probability, 1862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1872fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 1882fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 18912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi private static void addTrigramEntry(final BinaryDictionary binaryDictionary, final String word0, 19012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi final String word1, final String word2, final int probability) { 19177fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic binaryDictionary.addNgramEntry( 19277fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2, 19377fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic probability, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 19412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi } 19512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 196e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi private static boolean isValidBigram(final BinaryDictionary binaryDictionary, 197e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi final String word0, final String word1) { 198bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi return binaryDictionary.isValidNgram(new NgramContext(new WordInfo(word0)), word1); 199e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi } 200e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi 201e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi private static int getBigramProbability(final BinaryDictionary binaryDictionary, 202e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi final String word0, final String word1) { 203bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi return binaryDictionary.getNgramProbability(new NgramContext(new WordInfo(word0)), word1); 204e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi } 205e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi 20612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi private static int getTrigramProbability(final BinaryDictionary binaryDictionary, 20712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi final String word0, final String word1, final String word2) { 20877fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic return binaryDictionary.getNgramProbability( 20977fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic new NgramContext(new WordInfo(word1), new WordInfo(word0)), word2); 21012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi } 21112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 2125b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi public void testAddUnigramWord() { 213c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 2145b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi final int probability = 100; 2152fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", probability); 2165b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Reallocate and create. 2172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aab", probability); 2185b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Insert into children. 2192fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aac", probability); 2205b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Make terminal. 2212fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aa", probability); 2225b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Create children. 2232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaaa", probability); 2245b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Reallocate and make termianl. 2252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "a", probability); 2265b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi 2275b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi final int updatedProbability = 200; 2285b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi // Update. 2292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", updatedProbability); 2305b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi 2315b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aab")); 2325b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aac")); 23378b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aa")); 2345b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 2355b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("a")); 2365b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 23778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi } 23878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi 23978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi public void testRandomlyAddUnigramWord() { 24078b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi final int wordCount = 1000; 24178b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi final int codePointSetSize = 50; 242bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 243c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 24478b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi 245a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> probabilityMap = new HashMap<>(); 24678b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi // Test a word that isn't contained within the dictionary. 24778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi final Random random = new Random(seed); 24878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 24978b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi for (int i = 0; i < wordCount; ++i) { 25078b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 2514d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi probabilityMap.put(word, random.nextInt(0xFF)); 25278b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi } 25378b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi for (String word : probabilityMap.keySet()) { 2542fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); 25578b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi } 25678b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi for (String word : probabilityMap.keySet()) { 25778b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 25878b55a31cb158b1e14ccf678133269b0f99c7f9aKeisuke Kuroyanagi } 2595b51355b9ebf3bd1530074e9c7804f301d1ad5c5Keisuke Kuroyanagi } 260cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi 261cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi public void testAddBigramWords() { 262c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 263cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi 264cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi final int unigramProbability = 100; 26564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 150; 26664341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int updatedBigramProbability = 200; 2672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", unigramProbability); 2682fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abb", unigramProbability); 2692fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "bcc", unigramProbability); 2702fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 2712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 2722fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 2732fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 274cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi 275e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 276e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 277e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 278e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 279ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 280ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 281ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 282ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 2834d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi 2842fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); 285ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(updatedBigramProbability, 286ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "aaa", "abb")); 287cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi 288e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 289e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 290e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 2914d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, 292e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "bcc", "aaa")); 2934d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, 294e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "bcc", "bbc")); 2954d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, 296e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "aaa", "aaa")); 2974d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi 2984d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi // Testing bigram link. 2992fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abcde", unigramProbability); 3002fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "fghij", unigramProbability); 3012fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); 3022fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "fgh", unigramProbability); 3032fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abc", unigramProbability); 3042fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "f", unigramProbability); 30564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi 306ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abcde", "fghij")); 3074d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, 308e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "abcde", "fgh")); 3092fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); 310ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(updatedBigramProbability, 311ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getBigramProbability(binaryDictionary, "abcde", "fghij")); 312cd6a0430c7db5322f9ae680dfffe3d744395298cKeisuke Kuroyanagi } 31371cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi 31471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi public void testRandomlyAddBigramWords() { 31571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi final int wordCount = 100; 31671cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi final int bigramCount = 1000; 31771cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi final int codePointSetSize = 50; 318bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 319bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final Random random = new Random(seed); 320c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 321bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi 322a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 323a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 32471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 325a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 326a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 327bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi 32871cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi for (int i = 0; i < wordCount; ++i) { 32971cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 33071cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi words.add(word); 3314d02a2d44db94985c9f079cdd58c7c51d3e557eeKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 332bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi unigramProbabilities.put(word, unigramProbability); 3332fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 33471cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi } 33571cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi 33671cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi for (int i = 0; i < bigramCount; i++) { 337bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final String word0 = words.get(random.nextInt(wordCount)); 338bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final String word1 = words.get(random.nextInt(wordCount)); 339bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi if (TextUtils.equals(word0, word1)) { 340bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi continue; 341bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi } 342a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final Pair<String, String> bigram = new Pair<>(word0, word1); 343bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi bigramWords.add(bigram); 34464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int unigramProbability = unigramProbabilities.get(word1); 34564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 34664341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi unigramProbability + random.nextInt(0xFF - unigramProbability); 347bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi bigramProbabilities.put(bigram, bigramProbability); 3482fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, word0, word1, bigramProbability); 34971cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi } 35071cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi 351bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi for (final Pair<String, String> bigram : bigramWords) { 352bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final int bigramProbability = bigramProbabilities.get(bigram); 35364341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 354e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi isValidBigram(binaryDictionary, bigram.first, bigram.second)); 355ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, 356ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 35771cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi } 35871cce2dd3efd51bf0113e09d6e619400fb45478bKeisuke Kuroyanagi } 359d9f450ef00f09a9eccfc677968b46e072267a5f2Keisuke Kuroyanagi 36012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi public void testAddTrigramWords() { 361c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 36212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi final int unigramProbability = 100; 36312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi final int trigramProbability = 150; 36412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi final int updatedTrigramProbability = 200; 36512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addUnigramWord(binaryDictionary, "aaa", unigramProbability); 36612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addUnigramWord(binaryDictionary, "abb", unigramProbability); 36712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addUnigramWord(binaryDictionary, "bcc", unigramProbability); 36812a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 36912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addBigramWords(binaryDictionary, "abb", "bcc", 10); 37012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addBigramWords(binaryDictionary, "abb", "aaa", 10); 37112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 37212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addTrigramEntry(binaryDictionary, "aaa", "abb", "bcc", trigramProbability); 37312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", trigramProbability); 37412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 37512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi assertEquals(trigramProbability, 37612a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi getTrigramProbability(binaryDictionary, "aaa", "abb", "bcc")); 37712a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi assertEquals(trigramProbability, 37812a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); 37912a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); 38012a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 38112a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi addTrigramEntry(binaryDictionary, "bcc", "abb", "aaa", updatedTrigramProbability); 38212a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi assertEquals(updatedTrigramProbability, 38312a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi getTrigramProbability(binaryDictionary, "bcc", "abb", "aaa")); 38412a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi } 38512a5c959b3767b001a5c3a757bfd92965cde91ceKeisuke Kuroyanagi 38615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi public void testFlushDictionary() { 387c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 388ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 38915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 39015605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi final int probability = 100; 3912fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", probability); 3922fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abcd", probability); 39315605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi // Close without flushing. 39415605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.close(); 39515605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 39615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 39715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 39815605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 39915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 400484fa7b59cb0659ac18fa68da5c7b641d9255be8Keisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 401484fa7b59cb0659ac18fa68da5c7b641d9255be8Keisuke Kuroyanagi assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 40215605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 4032fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", probability); 4042fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abcd", probability); 40515605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.flush(); 40615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.close(); 40715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 408ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 40915605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("aaa")); 41015605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("abcd")); 4112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "bcde", probability); 41215605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.flush(); 41315605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.close(); 41415605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi 415ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 41615605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi assertEquals(probability, binaryDictionary.getFrequency("bcde")); 41715605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi binaryDictionary.close(); 41815605437548f7187c33bc8f260f80fae4303b460Keisuke Kuroyanagi } 4192cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi 4202cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi public void testFlushWithGCDictionary() { 421c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 422ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 4232cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi final int unigramProbability = 100; 42464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 150; 4252fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "aaa", unigramProbability); 4262fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "abb", unigramProbability); 4272fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, "bcc", unigramProbability); 4282fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 4292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 4302fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 4312fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 4322cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi binaryDictionary.flushWithGC(); 4332cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi binaryDictionary.close(); 4342cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi 435ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 4362cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 4372cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 4382cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 439ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 440ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 441ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 442ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 443e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 444e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 445e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 4462cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi binaryDictionary.flushWithGC(); 4472cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi binaryDictionary.close(); 4482cfe7f9e3b8a09aa00b18efcb82a1b3d5fed43f0Keisuke Kuroyanagi } 449f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 450f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi public void testAddBigramWordsAndFlashWithGC() { 451f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int wordCount = 100; 452f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int bigramCount = 1000; 453f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int codePointSetSize = 30; 454bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 455bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final Random random = new Random(seed); 456f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 457c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 458ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 459bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi 460a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 461a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 462f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 463a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 464a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 465bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi 466f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int i = 0; i < wordCount; ++i) { 467f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 468f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi words.add(word); 469f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 470bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi unigramProbabilities.put(word, unigramProbability); 4712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 472f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 473f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 474f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int i = 0; i < bigramCount; i++) { 475bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final String word0 = words.get(random.nextInt(wordCount)); 476bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final String word1 = words.get(random.nextInt(wordCount)); 477bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi if (TextUtils.equals(word0, word1)) { 478bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi continue; 479bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi } 480a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final Pair<String, String> bigram = new Pair<>(word0, word1); 481bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi bigramWords.add(bigram); 48264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int unigramProbability = unigramProbabilities.get(word1); 48364341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 48464341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi unigramProbability + random.nextInt(0xFF - unigramProbability); 485bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi bigramProbabilities.put(bigram, bigramProbability); 4862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, word0, word1, bigramProbability); 487f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 488f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 489f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 490f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.close(); 491ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 49264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi 493bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi for (final Pair<String, String> bigram : bigramWords) { 494bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final int bigramProbability = bigramProbabilities.get(bigram); 49564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 496e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi isValidBigram(binaryDictionary, bigram.first, bigram.second)); 497ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, 498ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 499f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 500f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 501f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 5022fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa public void testRandomOperationsAndFlashWithGC() { 5031085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final int maxUnigramCount = 5000; 5041085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final int maxBigramCount = 10000; 5051085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final HashMap<String, String> attributeMap = new HashMap<>(); 5061085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); 5071085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); 5081085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi 509f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int flashWithGCIterationCount = 50; 510f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int operationCountInEachIteration = 200; 511f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int initialUnigramCount = 100; 512f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final float addUnigramProb = 0.5f; 513f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final float addBigramProb = 0.8f; 514f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int codePointSetSize = 30; 515f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 516bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 517f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final Random random = new Random(seed); 518c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, 5191085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap); 520ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 521f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 522a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 523a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 524f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 525a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 526a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 527f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int i = 0; i < initialUnigramCount; ++i) { 528f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 529f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi words.add(word); 530f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 531f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi unigramProbabilities.put(word, unigramProbability); 5322fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 533f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 534f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 535f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.close(); 536f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 537f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 538ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 539f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 540f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi // Add unigram. 541f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi if (random.nextFloat() < addUnigramProb) { 542f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 543f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi words.add(word); 544f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 545f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi unigramProbabilities.put(word, unigramProbability); 5462fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 547f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 548f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi // Add bigram. 549f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi if (random.nextFloat() < addBigramProb && words.size() > 2) { 550f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int word0Index = random.nextInt(words.size()); 551f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi int word1Index = random.nextInt(words.size() - 1); 552f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi if (word0Index <= word1Index) { 553f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi word1Index++; 554f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 555f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word0 = words.get(word0Index); 556f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word1 = words.get(word1Index); 557bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi if (TextUtils.equals(word0, word1)) { 558bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi continue; 559bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi } 56064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int unigramProbability = unigramProbabilities.get(word1); 56164341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 56264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi unigramProbability + random.nextInt(0xFF - unigramProbability); 563a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final Pair<String, String> bigram = new Pair<>(word0, word1); 564f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi bigramWords.add(bigram); 565f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi bigramProbabilities.put(bigram, bigramProbability); 5662fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, word0, word1, bigramProbability); 567f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 568f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 569f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi 570f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi // Test whether the all unigram operations are collectlly handled. 571f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int i = 0; i < words.size(); i++) { 572f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final String word = words.get(i); 573f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int unigramProbability = unigramProbabilities.get(word); 574f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 575f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 576f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi // Test whether the all bigram operations are collectlly handled. 577f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi for (int i = 0; i < bigramWords.size(); i++) { 578f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final Pair<String, String> bigram = bigramWords.get(i); 579f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi final int probability; 580f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi if (bigramProbabilities.containsKey(bigram)) { 58177fd5dbf3e432356dd16eb428c740e446c04373eDan Zivkovic probability = bigramProbabilities.get(bigram); 582f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } else { 583f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi probability = Dictionary.NOT_A_PROBABILITY; 584f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 58564341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi 586ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(probability, 587ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 58864341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi assertEquals(probability != Dictionary.NOT_A_PROBABILITY, 589e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi isValidBigram(binaryDictionary, bigram.first, bigram.second)); 590f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 591f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 592f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi binaryDictionary.close(); 593f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 594f3cd38f2c71f013fcd1cdb43f3dd68e8335230ecKeisuke Kuroyanagi } 5953d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 5963d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi public void testAddManyUnigramsAndFlushWithGC() { 5973d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final int flashWithGCIterationCount = 3; 5983d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final int codePointSetSize = 50; 5993d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 600bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 6013d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final Random random = new Random(seed); 6023d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 603c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 6043d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 605a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 606a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 6073d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 6083d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 6093d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi BinaryDictionary binaryDictionary; 6103d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi for (int i = 0; i < flashWithGCIterationCount; i++) { 611ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 612b698e9c1fab9df8e1cd58f997ad62147522538fcKeisuke Kuroyanagi while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 6133d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 6143d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi words.add(word); 6153d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 6163d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi unigramProbabilities.put(word, unigramProbability); 6172fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 6183d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi } 6193d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 6203d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi for (int j = 0; j < words.size(); j++) { 6213d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final String word = words.get(j); 6223d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi final int unigramProbability = unigramProbabilities.get(word); 6233d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 6243d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi } 6253d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi 6263d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 6273d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi binaryDictionary.close(); 6283d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi } 6293d2802ef2e28a45374ebb70fa2303093571101adKeisuke Kuroyanagi } 63031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 63131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi public void testUnigramAndBigramCount() { 6321085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final int maxUnigramCount = 5000; 6331085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final int maxBigramCount = 10000; 6341085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi final HashMap<String, String> attributeMap = new HashMap<>(); 6351085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap.put(DictionaryHeader.MAX_UNIGRAM_COUNT_KEY, String.valueOf(maxUnigramCount)); 6361085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap.put(DictionaryHeader.MAX_BIGRAM_COUNT_KEY, String.valueOf(maxBigramCount)); 6371085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi 63831097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int flashWithGCIterationCount = 10; 63931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int codePointSetSize = 50; 64031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int unigramCountPerIteration = 1000; 64131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int bigramCountPerIteration = 2000; 642bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi final long seed = System.currentTimeMillis(); 64331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final Random random = new Random(seed); 644c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryWithAttributesAndGetFile(FormatSpec.VERSION403, 6451085fef8d040a6788f2185e7b03ab6b6032f321dKeisuke Kuroyanagi attributeMap); 64631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 647a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 648a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashSet<Pair<String, String>> bigrams = new HashSet<>(); 64931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 65031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi 65131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi BinaryDictionary binaryDictionary; 65231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi for (int i = 0; i < flashWithGCIterationCount; i++) { 653ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi binaryDictionary = getBinaryDictionary(dictFile); 65431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi for (int j = 0; j < unigramCountPerIteration; j++) { 65531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 65631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi words.add(word); 65731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 6582fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addUnigramWord(binaryDictionary, word, unigramProbability); 65931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 66031097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi for (int j = 0; j < bigramCountPerIteration; j++) { 66131097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final String word0 = words.get(random.nextInt(words.size())); 66231097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final String word1 = words.get(random.nextInt(words.size())); 663bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi if (TextUtils.equals(word0, word1)) { 664bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi continue; 665bcb196e3b5b329f3465beba18ac37b1940259e03Keisuke Kuroyanagi } 666a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka bigrams.add(new Pair<>(word0, word1)); 66731097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi final int bigramProbability = random.nextInt(0xF); 6682fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa addBigramWords(binaryDictionary, word0, word1, bigramProbability); 66931097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 670a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka assertEquals(new HashSet<>(words).size(), Integer.parseInt( 671743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi binaryDictionary.getPropertyForGettingStats( 672743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi BinaryDictionary.UNIGRAM_COUNT_QUERY))); 673a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 674743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi binaryDictionary.getPropertyForGettingStats( 675743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi BinaryDictionary.BIGRAM_COUNT_QUERY))); 67631097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi binaryDictionary.flushWithGC(); 677a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka assertEquals(new HashSet<>(words).size(), Integer.parseInt( 678743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi binaryDictionary.getPropertyForGettingStats( 679743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi BinaryDictionary.UNIGRAM_COUNT_QUERY))); 680a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 681743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi binaryDictionary.getPropertyForGettingStats( 682743a9b4499c9b53ffedc63f76137ce2eaa3301d0Keisuke Kuroyanagi BinaryDictionary.BIGRAM_COUNT_QUERY))); 68331097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi binaryDictionary.close(); 68431097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 68531097a57cc6f8022abc0ea56f27147399f41b630Keisuke Kuroyanagi } 6862fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 68794d9a2309fbca6b1e42b6c57b9c9509182fe8a0bKeisuke Kuroyanagi public void testGetWordProperties() { 6882fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final long seed = System.currentTimeMillis(); 6892fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final Random random = new Random(seed); 6909221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final int UNIGRAM_COUNT = 1000; 6919221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final int BIGRAM_COUNT = 1000; 6922fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final int codePointSetSize = 20; 6932fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 694c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final File dictFile = createEmptyDictionaryAndGetFile(FormatSpec.VERSION403); 695ef903a81acf90bedb56027bde1d2ea1267f02218Keisuke Kuroyanagi final BinaryDictionary binaryDictionary = getBinaryDictionary(dictFile); 6962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 69788fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 69888fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi false /* isBeginningOfSentence */); 69994d9a2309fbca6b1e42b6c57b9c9509182fe8a0bKeisuke Kuroyanagi assertFalse(invalidWordProperty.isValid()); 7002fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 701a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 702a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> wordProbabilities = new HashMap<>(); 703a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 704a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 7059221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi 7069221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi for (int i = 0; i < UNIGRAM_COUNT; i++) { 7072fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final String word = CodePointUtils.generateWord(random, codePointSet); 7082fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final int unigramProbability = random.nextInt(0xFF); 7092fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa final boolean isNotAWord = random.nextBoolean(); 71005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu final boolean isPossiblyOffensive = random.nextBoolean(); 7112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa // TODO: Add tests for historical info. 712e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi binaryDictionary.addUnigramEntry(word, unigramProbability, 71305172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu false /* isBeginningOfSentence */, isNotAWord, isPossiblyOffensive, 7141adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi BinaryDictionary.NOT_A_VALID_TIMESTAMP); 7159221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 7169221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 7179221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7189221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi words.add(word); 7199221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi wordProbabilities.put(word, unigramProbability); 72088fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 72188fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi false /* isBeginningOfSentence */); 7225f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi assertEquals(word, wordProperty.mWord); 7235f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi assertTrue(wordProperty.isValid()); 7245f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi assertEquals(isNotAWord, wordProperty.mIsNotAWord); 72505172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu assertEquals(isPossiblyOffensive, wordProperty.mIsPossiblyOffensive); 726c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi assertEquals(false, wordProperty.mHasNgrams); 7275f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); 7289221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7299221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi 7309221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi for (int i = 0; i < BIGRAM_COUNT; i++) { 7319221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final int word0Index = random.nextInt(wordProbabilities.size()); 7329221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final int word1Index = random.nextInt(wordProbabilities.size()); 7339221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi if (word0Index == word1Index) { 7349221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi continue; 7359221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7369221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final String word0 = words.get(word0Index); 7379221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final String word1 = words.get(word1Index); 73864341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int unigramProbability = wordProbabilities.get(word1); 73964341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 74064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi unigramProbability + random.nextInt(0xFF - unigramProbability); 741e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi addBigramWords(binaryDictionary, word0, word1, bigramProbability); 7429221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 7439221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 7449221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7459221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi if (!bigrams.containsKey(word0)) { 746a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashSet<String> bigramWord1s = new HashSet<>(); 7479221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi bigrams.put(word0, bigramWord1s); 7489221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7499221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi bigrams.get(word0).add(word1); 750a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); 7519221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7529221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi 7539221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi for (int i = 0; i < words.size(); i++) { 7549221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final String word0 = words.get(i); 7559221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi if (!bigrams.containsKey(word0)) { 7569221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi continue; 7579221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7589221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi final HashSet<String> bigramWord1s = bigrams.get(word0); 75988fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, 76088fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi false /* isBeginningOfSentence */); 761c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size()); 762c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi // TODO: Support ngram. 763c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi for (final WeightedString bigramTarget : wordProperty.getBigrams()) { 764c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi final String word1 = bigramTarget.mWord; 7659221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi assertTrue(bigramWord1s.contains(word1)); 766ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); 767ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, bigramTarget.getProbability()); 7689221772ab7f112f6ef9136a69d0502befbdc544eKeisuke Kuroyanagi } 7692fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 7702fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 7712fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 772941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi public void testIterateAllWords() { 773941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final long seed = System.currentTimeMillis(); 774941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final Random random = new Random(seed); 775941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int UNIGRAM_COUNT = 1000; 776941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int BIGRAM_COUNT = 1000; 777941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int codePointSetSize = 20; 778941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 779c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 780941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 78188fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 78288fa47a27d45f6460971d0d223aa558e121b3478Keisuke Kuroyanagi false /* isBeginningOfSentence */); 783941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi assertFalse(invalidWordProperty.isValid()); 784941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 785a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<String> words = new ArrayList<>(); 786a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); 787a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 788941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = 789a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka new HashMap<>(); 790941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 791941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi for (int i = 0; i < UNIGRAM_COUNT; i++) { 792941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final String word = CodePointUtils.generateWord(random, codePointSet); 793941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int unigramProbability = random.nextInt(0xFF); 794941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi addUnigramWord(binaryDictionary, word, unigramProbability); 795941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 796941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 797941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 798941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi words.add(word); 799941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi wordProbabilitiesToCheckLater.put(word, unigramProbability); 800941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 801941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 802941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi for (int i = 0; i < BIGRAM_COUNT; i++) { 803941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 804941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 805941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi if (word0Index == word1Index) { 806941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi continue; 807941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 808941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final String word0 = words.get(word0Index); 809941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final String word1 = words.get(word1Index); 81064341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); 81164341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi final int bigramProbability = 81264341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi unigramProbability + random.nextInt(0xFF - unigramProbability); 813e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagi addBigramWords(binaryDictionary, word0, word1, bigramProbability); 814941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 815941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi binaryDictionary.flushWithGC(); 816941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 817941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi if (!bigrams.containsKey(word0)) { 818a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashSet<String> bigramWord1s = new HashSet<>(); 819941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi bigrams.put(word0, bigramWord1s); 820941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 821941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi bigrams.get(word0).add(word1); 822a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); 823941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 824941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 825a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final HashSet<String> wordSet = new HashSet<>(words); 826941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final HashSet<Pair<String, String>> bigramSet = 827a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); 828941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi int token = 0; 829941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi do { 830941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final BinaryDictionary.GetNextWordPropertyResult result = 831941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi binaryDictionary.getNextWordProperty(token); 832941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final WordProperty wordProperty = result.mWordProperty; 8335f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi final String word0 = wordProperty.mWord; 834941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi assertEquals((int)wordProbabilitiesToCheckLater.get(word0), 835941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi wordProperty.mProbabilityInfo.mProbability); 836941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi wordSet.remove(word0); 837941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi final HashSet<String> bigramWord1s = bigrams.get(word0); 838c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi // TODO: Support ngram. 8392522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi if (wordProperty.mHasNgrams) { 8402522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi for (final WeightedString bigramTarget : wordProperty.getBigrams()) { 8412522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi final String word1 = bigramTarget.mWord; 8422522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi assertTrue(bigramWord1s.contains(word1)); 8432522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi final Pair<String, String> bigram = new Pair<>(word0, word1); 844ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); 845ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi assertEquals(bigramProbability, bigramTarget.getProbability()); 8462522a4a477d366a1e69c07c0a5b7c8a0f43118f6Keisuke Kuroyanagi bigramSet.remove(bigram); 84764341927d2359fe98928471fa2daa4db667144a8Keisuke Kuroyanagi } 848941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 849941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi token = result.mNextToken; 850941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } while (token != 0); 851941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi assertTrue(wordSet.isEmpty()); 852941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi assertTrue(bigramSet.isEmpty()); 853941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi } 854941734695b9eeb59135db737e4b153c45e88247aKeisuke Kuroyanagi 85505172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu public void testPossiblyOffensiveAttributeMaintained() { 85605172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu final BinaryDictionary binaryDictionary = 857ea468cc9de468b6574f98b3a7614decfcb7e456eKeisuke Kuroyanagi getEmptyBinaryDictionary(FormatSpec.VERSION403); 85812d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic binaryDictionary.addUnigramEntry("ddd", 100, false, true, true, 0); 85905172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu WordProperty wordProperty = binaryDictionary.getWordProperty("ddd", false); 86005172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu assertEquals(true, wordProperty.mIsPossiblyOffensive); 86105172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu } 8625f00fe09e9a611b647592188316e5999465df4d3Tadashi G. Takaoka 8631adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi public void testBeginningOfSentence() { 864c15bbb52a37be751fed2ba7e765dfd7727306308Dan Zivkovic final BinaryDictionary binaryDictionary = getEmptyBinaryDictionary(FormatSpec.VERSION403); 8651adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi final int dummyProbability = 0; 866bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi final NgramContext beginningOfSentenceContext = NgramContext.BEGINNING_OF_SENTENCE; 8671adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi final int bigramProbability = 200; 8681adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi addUnigramWord(binaryDictionary, "aaa", dummyProbability); 869bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, 8701adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 8711adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi assertEquals(bigramProbability, 872bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); 873bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.addNgramEntry(beginningOfSentenceContext, "aaa", bigramProbability, 8741adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 8751adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi addUnigramWord(binaryDictionary, "bbb", dummyProbability); 876bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.addNgramEntry(beginningOfSentenceContext, "bbb", bigramProbability, 8771adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 8781adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi binaryDictionary.flushWithGC(); 8791adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi assertEquals(bigramProbability, 880bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.getNgramProbability(beginningOfSentenceContext, "aaa")); 8811adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi assertEquals(bigramProbability, 882bb0eca57054758ef17b032d2654c1fc5f6b32101Keisuke Kuroyanagi binaryDictionary.getNgramProbability(beginningOfSentenceContext, "bbb")); 8831adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi } 8844be103df0e885ff35220629ace395d9d2e7c4a3fKeisuke Kuroyanagi} 885