BinaryDictionaryTests.java revision 88fa47a27d45f6460971d0d223aa558e121b3478
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.test.AndroidTestCase; 20import android.test.suitebuilder.annotation.LargeTest; 21import android.text.TextUtils; 22import android.util.Pair; 23 24import com.android.inputmethod.latin.makedict.CodePointUtils; 25import com.android.inputmethod.latin.makedict.FormatSpec; 26import com.android.inputmethod.latin.makedict.WeightedString; 27import com.android.inputmethod.latin.makedict.WordProperty; 28import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 29import com.android.inputmethod.latin.utils.FileUtils; 30import com.android.inputmethod.latin.utils.LanguageModelParam; 31 32import java.io.File; 33import java.io.IOException; 34import java.util.ArrayList; 35import java.util.HashMap; 36import java.util.HashSet; 37import java.util.Locale; 38import java.util.Map; 39import java.util.Random; 40 41// TODO Use the seed passed as an argument for makedict test. 42@LargeTest 43public class BinaryDictionaryTests extends AndroidTestCase { 44 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 45 private static final String TEST_LOCALE = "test"; 46 private static final int[] DICT_FORMAT_VERSIONS = 47 new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; 48 49 private static boolean canCheckBigramProbability(final int formatVersion) { 50 return formatVersion > FormatSpec.VERSION401; 51 } 52 53 private static boolean supportsBeginningOfSentence(final int formatVersion) { 54 return formatVersion > FormatSpec.VERSION401; 55 } 56 57 private File createEmptyDictionaryAndGetFile(final String dictId, 58 final int formatVersion) throws IOException { 59 if (formatVersion == FormatSpec.VERSION4 60 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING 61 || formatVersion == FormatSpec.VERSION4_DEV) { 62 return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); 63 } else { 64 throw new IOException("Dictionary format version " + formatVersion 65 + " is not supported."); 66 } 67 } 68 69 private File createEmptyVer4DictionaryAndGetFile(final String dictId, 70 final int formatVersion) throws IOException { 71 final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, 72 getContext().getCacheDir()); 73 file.delete(); 74 file.mkdir(); 75 Map<String, String> attributeMap = new HashMap<>(); 76 if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, 77 Locale.ENGLISH, attributeMap)) { 78 return file; 79 } else { 80 throw new IOException("Empty dictionary " + file.getAbsolutePath() 81 + " cannot be created. Format version: " + formatVersion); 82 } 83 } 84 85 public void testIsValidDictionary() { 86 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 87 testIsValidDictionary(formatVersion); 88 } 89 } 90 91 private void testIsValidDictionary(final int formatVersion) { 92 File dictFile = null; 93 try { 94 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 95 } catch (IOException e) { 96 fail("IOException while writing an initial dictionary : " + e); 97 } 98 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 99 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 100 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 101 assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 102 binaryDictionary.isValidDictionary()); 103 binaryDictionary.close(); 104 assertFalse("binaryDictionary must be invalid after closing.", 105 binaryDictionary.isValidDictionary()); 106 FileUtils.deleteRecursively(dictFile); 107 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, 108 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), 109 TEST_LOCALE, true /* isUpdatable */); 110 assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 111 binaryDictionary.isValidDictionary()); 112 binaryDictionary.close(); 113 } 114 115 public void testConstructingDictionaryOnMemory() { 116 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 117 testConstructingDictionaryOnMemory(formatVersion); 118 } 119 } 120 121 private void testConstructingDictionaryOnMemory(final int formatVersion) { 122 File dictFile = null; 123 try { 124 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 125 } catch (IOException e) { 126 fail("IOException while writing an initial dictionary : " + e); 127 } 128 FileUtils.deleteRecursively(dictFile); 129 assertFalse(dictFile.exists()); 130 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 131 true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, 132 new HashMap<String, String>()); 133 assertTrue(binaryDictionary.isValidDictionary()); 134 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 135 final int probability = 100; 136 addUnigramWord(binaryDictionary, "word", probability); 137 assertEquals(probability, binaryDictionary.getFrequency("word")); 138 assertFalse(dictFile.exists()); 139 binaryDictionary.flush(); 140 assertTrue(dictFile.exists()); 141 assertTrue(binaryDictionary.isValidDictionary()); 142 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 143 assertEquals(probability, binaryDictionary.getFrequency("word")); 144 binaryDictionary.close(); 145 dictFile.delete(); 146 } 147 148 public void testAddTooLongWord() { 149 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 150 testAddTooLongWord(formatVersion); 151 } 152 } 153 154 private void testAddTooLongWord(final int formatVersion) { 155 File dictFile = null; 156 try { 157 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 158 } catch (IOException e) { 159 fail("IOException while writing an initial dictionary : " + e); 160 } 161 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 162 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 163 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 164 165 final StringBuffer stringBuilder = new StringBuffer(); 166 for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { 167 stringBuilder.append('a'); 168 } 169 final String validLongWord = stringBuilder.toString(); 170 stringBuilder.append('a'); 171 final String invalidLongWord = stringBuilder.toString(); 172 final int probability = 100; 173 addUnigramWord(binaryDictionary, "aaa", probability); 174 addUnigramWord(binaryDictionary, validLongWord, probability); 175 addUnigramWord(binaryDictionary, invalidLongWord, probability); 176 // Too long short cut. 177 binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, 178 10 /* shortcutProbability */, false /* isBeginningOfSentence */, 179 false /* isNotAWord */, false /* isBlacklisted */, 180 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 181 addUnigramWord(binaryDictionary, "abc", probability); 182 final int updatedProbability = 200; 183 // Update. 184 addUnigramWord(binaryDictionary, validLongWord, updatedProbability); 185 addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); 186 addUnigramWord(binaryDictionary, "abc", updatedProbability); 187 188 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 189 assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); 190 assertEquals(BinaryDictionary.NOT_A_PROBABILITY, 191 binaryDictionary.getFrequency(invalidLongWord)); 192 assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); 193 dictFile.delete(); 194 } 195 196 private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, 197 final int probability) { 198 binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, 199 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, 200 false /* isBeginningOfSentence */, false /* isNotAWord */, 201 false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 202 } 203 204 private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, 205 final String word1, final int probability) { 206 binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability, 207 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 208 } 209 210 private static boolean isValidBigram(final BinaryDictionary binaryDictionary, 211 final String word0, final String word1) { 212 return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1); 213 } 214 215 private static void removeBigramEntry(final BinaryDictionary binaryDictionary, 216 final String word0, final String word1) { 217 binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1); 218 } 219 220 private static int getBigramProbability(final BinaryDictionary binaryDictionary, 221 final String word0, final String word1) { 222 return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1); 223 } 224 225 public void testAddUnigramWord() { 226 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 227 testAddUnigramWord(formatVersion); 228 } 229 } 230 231 private void testAddUnigramWord(final int formatVersion) { 232 File dictFile = null; 233 try { 234 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 235 } catch (IOException e) { 236 fail("IOException while writing an initial dictionary : " + e); 237 } 238 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 239 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 240 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 241 242 final int probability = 100; 243 addUnigramWord(binaryDictionary, "aaa", probability); 244 // Reallocate and create. 245 addUnigramWord(binaryDictionary, "aab", probability); 246 // Insert into children. 247 addUnigramWord(binaryDictionary, "aac", probability); 248 // Make terminal. 249 addUnigramWord(binaryDictionary, "aa", probability); 250 // Create children. 251 addUnigramWord(binaryDictionary, "aaaa", probability); 252 // Reallocate and make termianl. 253 addUnigramWord(binaryDictionary, "a", probability); 254 255 final int updatedProbability = 200; 256 // Update. 257 addUnigramWord(binaryDictionary, "aaa", updatedProbability); 258 259 assertEquals(probability, binaryDictionary.getFrequency("aab")); 260 assertEquals(probability, binaryDictionary.getFrequency("aac")); 261 assertEquals(probability, binaryDictionary.getFrequency("aa")); 262 assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 263 assertEquals(probability, binaryDictionary.getFrequency("a")); 264 assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 265 266 dictFile.delete(); 267 } 268 269 public void testRandomlyAddUnigramWord() { 270 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 271 testRandomlyAddUnigramWord(formatVersion); 272 } 273 } 274 275 private void testRandomlyAddUnigramWord(final int formatVersion) { 276 final int wordCount = 1000; 277 final int codePointSetSize = 50; 278 final long seed = System.currentTimeMillis(); 279 280 File dictFile = null; 281 try { 282 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 283 } catch (IOException e) { 284 fail("IOException while writing an initial dictionary : " + e); 285 } 286 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 287 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 288 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 289 290 final HashMap<String, Integer> probabilityMap = new HashMap<>(); 291 // Test a word that isn't contained within the dictionary. 292 final Random random = new Random(seed); 293 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 294 for (int i = 0; i < wordCount; ++i) { 295 final String word = CodePointUtils.generateWord(random, codePointSet); 296 probabilityMap.put(word, random.nextInt(0xFF)); 297 } 298 for (String word : probabilityMap.keySet()) { 299 addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); 300 } 301 for (String word : probabilityMap.keySet()) { 302 assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 303 } 304 dictFile.delete(); 305 } 306 307 public void testAddBigramWords() { 308 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 309 testAddBigramWords(formatVersion); 310 } 311 } 312 313 private void testAddBigramWords(final int formatVersion) { 314 File dictFile = null; 315 try { 316 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 317 } catch (IOException e) { 318 fail("IOException while writing an initial dictionary : " + e); 319 } 320 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 321 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 322 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 323 324 final int unigramProbability = 100; 325 final int bigramProbability = 150; 326 final int updatedBigramProbability = 200; 327 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 328 addUnigramWord(binaryDictionary, "abb", unigramProbability); 329 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 330 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 331 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 332 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 333 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 334 335 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 336 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 337 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 338 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 339 if (canCheckBigramProbability(formatVersion)) { 340 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 341 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 342 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 343 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 344 } 345 346 addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); 347 if (canCheckBigramProbability(formatVersion)) { 348 assertEquals(updatedBigramProbability, 349 getBigramProbability(binaryDictionary, "aaa", "abb")); 350 } 351 352 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 353 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 354 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 355 assertEquals(Dictionary.NOT_A_PROBABILITY, 356 getBigramProbability(binaryDictionary, "bcc", "aaa")); 357 assertEquals(Dictionary.NOT_A_PROBABILITY, 358 getBigramProbability(binaryDictionary, "bcc", "bbc")); 359 assertEquals(Dictionary.NOT_A_PROBABILITY, 360 getBigramProbability(binaryDictionary, "aaa", "aaa")); 361 362 // Testing bigram link. 363 addUnigramWord(binaryDictionary, "abcde", unigramProbability); 364 addUnigramWord(binaryDictionary, "fghij", unigramProbability); 365 addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); 366 addUnigramWord(binaryDictionary, "fgh", unigramProbability); 367 addUnigramWord(binaryDictionary, "abc", unigramProbability); 368 addUnigramWord(binaryDictionary, "f", unigramProbability); 369 370 if (canCheckBigramProbability(formatVersion)) { 371 assertEquals(bigramProbability, 372 getBigramProbability(binaryDictionary, "abcde", "fghij")); 373 } 374 assertEquals(Dictionary.NOT_A_PROBABILITY, 375 getBigramProbability(binaryDictionary, "abcde", "fgh")); 376 addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); 377 if (canCheckBigramProbability(formatVersion)) { 378 assertEquals(updatedBigramProbability, 379 getBigramProbability(binaryDictionary, "abcde", "fghij")); 380 } 381 382 dictFile.delete(); 383 } 384 385 public void testRandomlyAddBigramWords() { 386 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 387 testRandomlyAddBigramWords(formatVersion); 388 } 389 } 390 391 private void testRandomlyAddBigramWords(final int formatVersion) { 392 final int wordCount = 100; 393 final int bigramCount = 1000; 394 final int codePointSetSize = 50; 395 final long seed = System.currentTimeMillis(); 396 final Random random = new Random(seed); 397 398 File dictFile = null; 399 try { 400 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 401 } catch (IOException e) { 402 fail("IOException while writing an initial dictionary : " + e); 403 } 404 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 405 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 406 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 407 408 final ArrayList<String> words = new ArrayList<>(); 409 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 410 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 411 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 412 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 413 414 for (int i = 0; i < wordCount; ++i) { 415 final String word = CodePointUtils.generateWord(random, codePointSet); 416 words.add(word); 417 final int unigramProbability = random.nextInt(0xFF); 418 unigramProbabilities.put(word, unigramProbability); 419 addUnigramWord(binaryDictionary, word, unigramProbability); 420 } 421 422 for (int i = 0; i < bigramCount; i++) { 423 final String word0 = words.get(random.nextInt(wordCount)); 424 final String word1 = words.get(random.nextInt(wordCount)); 425 if (TextUtils.equals(word0, word1)) { 426 continue; 427 } 428 final Pair<String, String> bigram = new Pair<>(word0, word1); 429 bigramWords.add(bigram); 430 final int unigramProbability = unigramProbabilities.get(word1); 431 final int bigramProbability = 432 unigramProbability + random.nextInt(0xFF - unigramProbability); 433 bigramProbabilities.put(bigram, bigramProbability); 434 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 435 } 436 437 for (final Pair<String, String> bigram : bigramWords) { 438 final int bigramProbability = bigramProbabilities.get(bigram); 439 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 440 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 441 if (canCheckBigramProbability(formatVersion)) { 442 assertEquals(bigramProbability, 443 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 444 } 445 } 446 447 dictFile.delete(); 448 } 449 450 public void testRemoveBigramWords() { 451 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 452 testRemoveBigramWords(formatVersion); 453 } 454 } 455 456 private void testRemoveBigramWords(final int formatVersion) { 457 File dictFile = null; 458 try { 459 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 460 } catch (IOException e) { 461 fail("IOException while writing an initial dictionary : " + e); 462 } 463 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 464 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 465 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 466 final int unigramProbability = 100; 467 final int bigramProbability = 150; 468 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 469 addUnigramWord(binaryDictionary, "abb", unigramProbability); 470 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 471 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 472 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 473 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 474 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 475 476 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 477 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 478 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 479 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 480 481 removeBigramEntry(binaryDictionary, "aaa", "abb"); 482 assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); 483 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 484 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 485 486 487 removeBigramEntry(binaryDictionary, "aaa", "bcc"); 488 assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); 489 removeBigramEntry(binaryDictionary, "abb", "aaa"); 490 assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); 491 removeBigramEntry(binaryDictionary, "abb", "bcc"); 492 assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); 493 494 removeBigramEntry(binaryDictionary, "aaa", "abb"); 495 // Test remove non-existing bigram operation. 496 removeBigramEntry(binaryDictionary, "aaa", "abb"); 497 removeBigramEntry(binaryDictionary, "bcc", "aaa"); 498 499 dictFile.delete(); 500 } 501 502 public void testFlushDictionary() { 503 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 504 testFlushDictionary(formatVersion); 505 } 506 } 507 508 private void testFlushDictionary(final int formatVersion) { 509 File dictFile = null; 510 try { 511 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 512 } catch (IOException e) { 513 fail("IOException while writing an initial dictionary : " + e); 514 } 515 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 516 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 517 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 518 519 final int probability = 100; 520 addUnigramWord(binaryDictionary, "aaa", probability); 521 addUnigramWord(binaryDictionary, "abcd", probability); 522 // Close without flushing. 523 binaryDictionary.close(); 524 525 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 526 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 527 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 528 529 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 530 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 531 532 addUnigramWord(binaryDictionary, "aaa", probability); 533 addUnigramWord(binaryDictionary, "abcd", probability); 534 binaryDictionary.flush(); 535 binaryDictionary.close(); 536 537 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 538 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 539 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 540 541 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 542 assertEquals(probability, binaryDictionary.getFrequency("abcd")); 543 addUnigramWord(binaryDictionary, "bcde", probability); 544 binaryDictionary.flush(); 545 binaryDictionary.close(); 546 547 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 548 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 549 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 550 assertEquals(probability, binaryDictionary.getFrequency("bcde")); 551 binaryDictionary.close(); 552 553 dictFile.delete(); 554 } 555 556 public void testFlushWithGCDictionary() { 557 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 558 testFlushWithGCDictionary(formatVersion); 559 } 560 } 561 562 private void testFlushWithGCDictionary(final int formatVersion) { 563 File dictFile = null; 564 try { 565 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 566 } catch (IOException e) { 567 fail("IOException while writing an initial dictionary : " + e); 568 } 569 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 570 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 571 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 572 573 final int unigramProbability = 100; 574 final int bigramProbability = 150; 575 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 576 addUnigramWord(binaryDictionary, "abb", unigramProbability); 577 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 578 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 579 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 580 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 581 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 582 binaryDictionary.flushWithGC(); 583 binaryDictionary.close(); 584 585 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 586 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 587 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 588 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 589 assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 590 assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 591 if (canCheckBigramProbability(formatVersion)) { 592 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 593 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 594 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 595 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 596 } 597 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 598 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 599 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 600 binaryDictionary.flushWithGC(); 601 binaryDictionary.close(); 602 603 dictFile.delete(); 604 } 605 606 public void testAddBigramWordsAndFlashWithGC() { 607 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 608 testAddBigramWordsAndFlashWithGC(formatVersion); 609 } 610 } 611 612 // TODO: Evaluate performance of GC 613 private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { 614 final int wordCount = 100; 615 final int bigramCount = 1000; 616 final int codePointSetSize = 30; 617 final long seed = System.currentTimeMillis(); 618 final Random random = new Random(seed); 619 620 File dictFile = null; 621 try { 622 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 623 } catch (IOException e) { 624 fail("IOException while writing an initial dictionary : " + e); 625 } 626 627 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 628 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 629 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 630 631 final ArrayList<String> words = new ArrayList<>(); 632 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 633 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 634 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 635 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 636 637 for (int i = 0; i < wordCount; ++i) { 638 final String word = CodePointUtils.generateWord(random, codePointSet); 639 words.add(word); 640 final int unigramProbability = random.nextInt(0xFF); 641 unigramProbabilities.put(word, unigramProbability); 642 addUnigramWord(binaryDictionary, word, unigramProbability); 643 } 644 645 for (int i = 0; i < bigramCount; i++) { 646 final String word0 = words.get(random.nextInt(wordCount)); 647 final String word1 = words.get(random.nextInt(wordCount)); 648 if (TextUtils.equals(word0, word1)) { 649 continue; 650 } 651 final Pair<String, String> bigram = new Pair<>(word0, word1); 652 bigramWords.add(bigram); 653 final int unigramProbability = unigramProbabilities.get(word1); 654 final int bigramProbability = 655 unigramProbability + random.nextInt(0xFF - unigramProbability); 656 bigramProbabilities.put(bigram, bigramProbability); 657 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 658 } 659 660 binaryDictionary.flushWithGC(); 661 binaryDictionary.close(); 662 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 663 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 664 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 665 666 667 for (final Pair<String, String> bigram : bigramWords) { 668 final int bigramProbability = bigramProbabilities.get(bigram); 669 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 670 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 671 if (canCheckBigramProbability(formatVersion)) { 672 assertEquals(bigramProbability, 673 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 674 } 675 } 676 677 dictFile.delete(); 678 } 679 680 public void testRandomOperationsAndFlashWithGC() { 681 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 682 testRandomOperationsAndFlashWithGC(formatVersion); 683 } 684 } 685 686 private void testRandomOperationsAndFlashWithGC(final int formatVersion) { 687 final int flashWithGCIterationCount = 50; 688 final int operationCountInEachIteration = 200; 689 final int initialUnigramCount = 100; 690 final float addUnigramProb = 0.5f; 691 final float addBigramProb = 0.8f; 692 final float removeBigramProb = 0.2f; 693 final int codePointSetSize = 30; 694 695 final long seed = System.currentTimeMillis(); 696 final Random random = new Random(seed); 697 698 File dictFile = null; 699 try { 700 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 701 } catch (IOException e) { 702 fail("IOException while writing an initial dictionary : " + e); 703 } 704 705 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 706 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 707 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 708 final ArrayList<String> words = new ArrayList<>(); 709 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 710 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 711 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 712 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 713 for (int i = 0; i < initialUnigramCount; ++i) { 714 final String word = CodePointUtils.generateWord(random, codePointSet); 715 words.add(word); 716 final int unigramProbability = random.nextInt(0xFF); 717 unigramProbabilities.put(word, unigramProbability); 718 addUnigramWord(binaryDictionary, word, unigramProbability); 719 } 720 binaryDictionary.flushWithGC(); 721 binaryDictionary.close(); 722 723 for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 724 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 725 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 726 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 727 for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 728 // Add unigram. 729 if (random.nextFloat() < addUnigramProb) { 730 final String word = CodePointUtils.generateWord(random, codePointSet); 731 words.add(word); 732 final int unigramProbability = random.nextInt(0xFF); 733 unigramProbabilities.put(word, unigramProbability); 734 addUnigramWord(binaryDictionary, word, unigramProbability); 735 } 736 // Add bigram. 737 if (random.nextFloat() < addBigramProb && words.size() > 2) { 738 final int word0Index = random.nextInt(words.size()); 739 int word1Index = random.nextInt(words.size() - 1); 740 if (word0Index <= word1Index) { 741 word1Index++; 742 } 743 final String word0 = words.get(word0Index); 744 final String word1 = words.get(word1Index); 745 if (TextUtils.equals(word0, word1)) { 746 continue; 747 } 748 final int unigramProbability = unigramProbabilities.get(word1); 749 final int bigramProbability = 750 unigramProbability + random.nextInt(0xFF - unigramProbability); 751 final Pair<String, String> bigram = new Pair<>(word0, word1); 752 bigramWords.add(bigram); 753 bigramProbabilities.put(bigram, bigramProbability); 754 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 755 } 756 // Remove bigram. 757 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { 758 final int bigramIndex = random.nextInt(bigramWords.size()); 759 final Pair<String, String> bigram = bigramWords.get(bigramIndex); 760 bigramWords.remove(bigramIndex); 761 bigramProbabilities.remove(bigram); 762 removeBigramEntry(binaryDictionary, bigram.first, bigram.second); 763 } 764 } 765 766 // Test whether the all unigram operations are collectlly handled. 767 for (int i = 0; i < words.size(); i++) { 768 final String word = words.get(i); 769 final int unigramProbability = unigramProbabilities.get(word); 770 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 771 } 772 // Test whether the all bigram operations are collectlly handled. 773 for (int i = 0; i < bigramWords.size(); i++) { 774 final Pair<String, String> bigram = bigramWords.get(i); 775 final int probability; 776 if (bigramProbabilities.containsKey(bigram)) { 777 final int bigramProbability = bigramProbabilities.get(bigram); 778 probability = bigramProbability; 779 } else { 780 probability = Dictionary.NOT_A_PROBABILITY; 781 } 782 783 if (canCheckBigramProbability(formatVersion)) { 784 assertEquals(probability, 785 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 786 } 787 assertEquals(probability != Dictionary.NOT_A_PROBABILITY, 788 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 789 } 790 binaryDictionary.flushWithGC(); 791 binaryDictionary.close(); 792 } 793 794 dictFile.delete(); 795 } 796 797 public void testAddManyUnigramsAndFlushWithGC() { 798 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 799 testAddManyUnigramsAndFlushWithGC(formatVersion); 800 } 801 } 802 803 private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { 804 final int flashWithGCIterationCount = 3; 805 final int codePointSetSize = 50; 806 807 final long seed = System.currentTimeMillis(); 808 final Random random = new Random(seed); 809 810 File dictFile = null; 811 try { 812 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 813 } catch (IOException e) { 814 fail("IOException while writing an initial dictionary : " + e); 815 } 816 817 final ArrayList<String> words = new ArrayList<>(); 818 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 819 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 820 821 BinaryDictionary binaryDictionary; 822 for (int i = 0; i < flashWithGCIterationCount; i++) { 823 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 824 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 825 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 826 while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 827 final String word = CodePointUtils.generateWord(random, codePointSet); 828 words.add(word); 829 final int unigramProbability = random.nextInt(0xFF); 830 unigramProbabilities.put(word, unigramProbability); 831 addUnigramWord(binaryDictionary, word, unigramProbability); 832 } 833 834 for (int j = 0; j < words.size(); j++) { 835 final String word = words.get(j); 836 final int unigramProbability = unigramProbabilities.get(word); 837 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 838 } 839 840 binaryDictionary.flushWithGC(); 841 binaryDictionary.close(); 842 } 843 844 dictFile.delete(); 845 } 846 847 public void testUnigramAndBigramCount() { 848 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 849 testUnigramAndBigramCount(formatVersion); 850 } 851 } 852 853 private void testUnigramAndBigramCount(final int formatVersion) { 854 final int flashWithGCIterationCount = 10; 855 final int codePointSetSize = 50; 856 final int unigramCountPerIteration = 1000; 857 final int bigramCountPerIteration = 2000; 858 final long seed = System.currentTimeMillis(); 859 final Random random = new Random(seed); 860 861 File dictFile = null; 862 try { 863 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 864 } catch (IOException e) { 865 fail("IOException while writing an initial dictionary : " + e); 866 } 867 868 final ArrayList<String> words = new ArrayList<>(); 869 final HashSet<Pair<String, String>> bigrams = new HashSet<>(); 870 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 871 872 BinaryDictionary binaryDictionary; 873 for (int i = 0; i < flashWithGCIterationCount; i++) { 874 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 875 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 876 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 877 for (int j = 0; j < unigramCountPerIteration; j++) { 878 final String word = CodePointUtils.generateWord(random, codePointSet); 879 words.add(word); 880 final int unigramProbability = random.nextInt(0xFF); 881 addUnigramWord(binaryDictionary, word, unigramProbability); 882 } 883 for (int j = 0; j < bigramCountPerIteration; j++) { 884 final String word0 = words.get(random.nextInt(words.size())); 885 final String word1 = words.get(random.nextInt(words.size())); 886 if (TextUtils.equals(word0, word1)) { 887 continue; 888 } 889 bigrams.add(new Pair<>(word0, word1)); 890 final int bigramProbability = random.nextInt(0xF); 891 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 892 } 893 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 894 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 895 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 896 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 897 binaryDictionary.flushWithGC(); 898 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 899 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 900 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 901 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 902 binaryDictionary.close(); 903 } 904 905 dictFile.delete(); 906 } 907 908 public void testAddMultipleDictionaryEntries() { 909 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 910 testAddMultipleDictionaryEntries(formatVersion); 911 } 912 } 913 914 private void testAddMultipleDictionaryEntries(final int formatVersion) { 915 final int codePointSetSize = 20; 916 final int lmParamCount = 1000; 917 final double bigramContinueRate = 0.9; 918 final long seed = System.currentTimeMillis(); 919 final Random random = new Random(seed); 920 921 File dictFile = null; 922 try { 923 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 924 } catch (IOException e) { 925 fail("IOException while writing an initial dictionary : " + e); 926 } 927 928 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 929 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 930 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 931 932 final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; 933 String prevWord = null; 934 for (int i = 0; i < languageModelParams.length; i++) { 935 final String word = CodePointUtils.generateWord(random, codePointSet); 936 final int probability = random.nextInt(0xFF); 937 final int bigramProbability = probability + random.nextInt(0xFF - probability); 938 unigramProbabilities.put(word, probability); 939 if (prevWord == null) { 940 languageModelParams[i] = new LanguageModelParam(word, probability, 941 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 942 } else { 943 languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, 944 bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); 945 bigramProbabilities.put(new Pair<>(prevWord, word), 946 bigramProbability); 947 } 948 prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; 949 } 950 951 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 952 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 953 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 954 binaryDictionary.addMultipleDictionaryEntries(languageModelParams); 955 956 for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { 957 assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); 958 } 959 960 for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { 961 final String word0 = entry.getKey().first; 962 final String word1 = entry.getKey().second; 963 final int bigramProbability = entry.getValue(); 964 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 965 isValidBigram(binaryDictionary, word0, word1)); 966 if (canCheckBigramProbability(formatVersion)) { 967 assertEquals(bigramProbability, 968 getBigramProbability(binaryDictionary, word0, word1)); 969 } 970 } 971 } 972 973 public void testGetWordProperties() { 974 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 975 testGetWordProperties(formatVersion); 976 } 977 } 978 979 private void testGetWordProperties(final int formatVersion) { 980 final long seed = System.currentTimeMillis(); 981 final Random random = new Random(seed); 982 final int UNIGRAM_COUNT = 1000; 983 final int BIGRAM_COUNT = 1000; 984 final int codePointSetSize = 20; 985 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 986 987 File dictFile = null; 988 try { 989 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 990 } catch (IOException e) { 991 fail("IOException while writing an initial dictionary : " + e); 992 } 993 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 994 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 995 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 996 997 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 998 false /* isBeginningOfSentence */); 999 assertFalse(invalidWordProperty.isValid()); 1000 1001 final ArrayList<String> words = new ArrayList<>(); 1002 final HashMap<String, Integer> wordProbabilities = new HashMap<>(); 1003 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1004 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1005 1006 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1007 final String word = CodePointUtils.generateWord(random, codePointSet); 1008 final int unigramProbability = random.nextInt(0xFF); 1009 final boolean isNotAWord = random.nextBoolean(); 1010 final boolean isBlacklisted = random.nextBoolean(); 1011 // TODO: Add tests for historical info. 1012 binaryDictionary.addUnigramEntry(word, unigramProbability, 1013 null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, 1014 false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, 1015 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 1016 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1017 binaryDictionary.flushWithGC(); 1018 } 1019 words.add(word); 1020 wordProbabilities.put(word, unigramProbability); 1021 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 1022 false /* isBeginningOfSentence */); 1023 assertEquals(word, wordProperty.mWord); 1024 assertTrue(wordProperty.isValid()); 1025 assertEquals(isNotAWord, wordProperty.mIsNotAWord); 1026 assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); 1027 assertEquals(false, wordProperty.mHasBigrams); 1028 assertEquals(false, wordProperty.mHasShortcuts); 1029 assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); 1030 assertTrue(wordProperty.mShortcutTargets.isEmpty()); 1031 } 1032 1033 for (int i = 0; i < BIGRAM_COUNT; i++) { 1034 final int word0Index = random.nextInt(wordProbabilities.size()); 1035 final int word1Index = random.nextInt(wordProbabilities.size()); 1036 if (word0Index == word1Index) { 1037 continue; 1038 } 1039 final String word0 = words.get(word0Index); 1040 final String word1 = words.get(word1Index); 1041 final int unigramProbability = wordProbabilities.get(word1); 1042 final int bigramProbability = 1043 unigramProbability + random.nextInt(0xFF - unigramProbability); 1044 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1045 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1046 binaryDictionary.flushWithGC(); 1047 } 1048 if (!bigrams.containsKey(word0)) { 1049 final HashSet<String> bigramWord1s = new HashSet<>(); 1050 bigrams.put(word0, bigramWord1s); 1051 } 1052 bigrams.get(word0).add(word1); 1053 bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); 1054 } 1055 1056 for (int i = 0; i < words.size(); i++) { 1057 final String word0 = words.get(i); 1058 if (!bigrams.containsKey(word0)) { 1059 continue; 1060 } 1061 final HashSet<String> bigramWord1s = bigrams.get(word0); 1062 final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, 1063 false /* isBeginningOfSentence */); 1064 assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); 1065 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1066 final String word1 = wordProperty.mBigrams.get(j).mWord; 1067 assertTrue(bigramWord1s.contains(word1)); 1068 if (canCheckBigramProbability(formatVersion)) { 1069 final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); 1070 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1071 } 1072 } 1073 } 1074 } 1075 1076 public void testIterateAllWords() { 1077 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1078 testIterateAllWords(formatVersion); 1079 } 1080 } 1081 1082 private void testIterateAllWords(final int formatVersion) { 1083 final long seed = System.currentTimeMillis(); 1084 final Random random = new Random(seed); 1085 final int UNIGRAM_COUNT = 1000; 1086 final int BIGRAM_COUNT = 1000; 1087 final int codePointSetSize = 20; 1088 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1089 1090 File dictFile = null; 1091 try { 1092 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1093 } catch (IOException e) { 1094 fail("IOException while writing an initial dictionary : " + e); 1095 } 1096 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1097 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1098 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1099 1100 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord", 1101 false /* isBeginningOfSentence */); 1102 assertFalse(invalidWordProperty.isValid()); 1103 1104 final ArrayList<String> words = new ArrayList<>(); 1105 final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); 1106 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1107 final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = 1108 new HashMap<>(); 1109 1110 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1111 final String word = CodePointUtils.generateWord(random, codePointSet); 1112 final int unigramProbability = random.nextInt(0xFF); 1113 addUnigramWord(binaryDictionary, word, unigramProbability); 1114 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1115 binaryDictionary.flushWithGC(); 1116 } 1117 words.add(word); 1118 wordProbabilitiesToCheckLater.put(word, unigramProbability); 1119 } 1120 1121 for (int i = 0; i < BIGRAM_COUNT; i++) { 1122 final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1123 final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1124 if (word0Index == word1Index) { 1125 continue; 1126 } 1127 final String word0 = words.get(word0Index); 1128 final String word1 = words.get(word1Index); 1129 final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); 1130 final int bigramProbability = 1131 unigramProbability + random.nextInt(0xFF - unigramProbability); 1132 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1133 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1134 binaryDictionary.flushWithGC(); 1135 } 1136 if (!bigrams.containsKey(word0)) { 1137 final HashSet<String> bigramWord1s = new HashSet<>(); 1138 bigrams.put(word0, bigramWord1s); 1139 } 1140 bigrams.get(word0).add(word1); 1141 bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); 1142 } 1143 1144 final HashSet<String> wordSet = new HashSet<>(words); 1145 final HashSet<Pair<String, String>> bigramSet = 1146 new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); 1147 int token = 0; 1148 do { 1149 final BinaryDictionary.GetNextWordPropertyResult result = 1150 binaryDictionary.getNextWordProperty(token); 1151 final WordProperty wordProperty = result.mWordProperty; 1152 final String word0 = wordProperty.mWord; 1153 assertEquals((int)wordProbabilitiesToCheckLater.get(word0), 1154 wordProperty.mProbabilityInfo.mProbability); 1155 wordSet.remove(word0); 1156 final HashSet<String> bigramWord1s = bigrams.get(word0); 1157 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1158 final String word1 = wordProperty.mBigrams.get(j).mWord; 1159 assertTrue(bigramWord1s.contains(word1)); 1160 final Pair<String, String> bigram = new Pair<>(word0, word1); 1161 if (canCheckBigramProbability(formatVersion)) { 1162 final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); 1163 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1164 } 1165 bigramSet.remove(bigram); 1166 } 1167 token = result.mNextToken; 1168 } while (token != 0); 1169 assertTrue(wordSet.isEmpty()); 1170 assertTrue(bigramSet.isEmpty()); 1171 } 1172 1173 public void testAddShortcuts() { 1174 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1175 testAddShortcuts(formatVersion); 1176 } 1177 } 1178 1179 private void testAddShortcuts(final int formatVersion) { 1180 File dictFile = null; 1181 try { 1182 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1183 } catch (IOException e) { 1184 fail("IOException while writing an initial dictionary : " + e); 1185 } 1186 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1187 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1188 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1189 1190 final int unigramProbability = 100; 1191 final int shortcutProbability = 10; 1192 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1193 shortcutProbability, false /* isBeginningOfSentence */, 1194 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1195 WordProperty wordProperty = binaryDictionary.getWordProperty("aaa", 1196 false /* isBeginningOfSentence */); 1197 assertEquals(1, wordProperty.mShortcutTargets.size()); 1198 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1199 assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); 1200 final int updatedShortcutProbability = 2; 1201 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1202 updatedShortcutProbability, false /* isBeginningOfSentence */, 1203 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1204 wordProperty = binaryDictionary.getWordProperty("aaa", 1205 false /* isBeginningOfSentence */); 1206 assertEquals(1, wordProperty.mShortcutTargets.size()); 1207 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1208 assertEquals(updatedShortcutProbability, 1209 wordProperty.mShortcutTargets.get(0).getProbability()); 1210 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", 1211 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1212 false /* isBlacklisted */, 0 /* timestamp */); 1213 final HashMap<String, Integer> shortcutTargets = new HashMap<>(); 1214 shortcutTargets.put("zzz", updatedShortcutProbability); 1215 shortcutTargets.put("yyy", shortcutProbability); 1216 wordProperty = binaryDictionary.getWordProperty("aaa", 1217 false /* isBeginningOfSentence */); 1218 assertEquals(2, wordProperty.mShortcutTargets.size()); 1219 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1220 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1221 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1222 shortcutTarget.getProbability()); 1223 shortcutTargets.remove(shortcutTarget.mWord); 1224 } 1225 shortcutTargets.put("zzz", updatedShortcutProbability); 1226 shortcutTargets.put("yyy", shortcutProbability); 1227 binaryDictionary.flushWithGC(); 1228 wordProperty = binaryDictionary.getWordProperty("aaa", 1229 false /* isBeginningOfSentence */); 1230 assertEquals(2, wordProperty.mShortcutTargets.size()); 1231 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1232 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1233 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1234 shortcutTarget.getProbability()); 1235 shortcutTargets.remove(shortcutTarget.mWord); 1236 } 1237 } 1238 1239 public void testAddManyShortcuts() { 1240 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1241 testAddManyShortcuts(formatVersion); 1242 } 1243 } 1244 1245 private void testAddManyShortcuts(final int formatVersion) { 1246 final long seed = System.currentTimeMillis(); 1247 final Random random = new Random(seed); 1248 final int UNIGRAM_COUNT = 1000; 1249 final int SHORTCUT_COUNT = 10000; 1250 final int codePointSetSize = 20; 1251 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1252 1253 final ArrayList<String> words = new ArrayList<>(); 1254 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1255 final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); 1256 1257 File dictFile = null; 1258 try { 1259 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1260 } catch (IOException e) { 1261 fail("IOException while writing an initial dictionary : " + e); 1262 } 1263 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1264 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1265 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1266 1267 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1268 final String word = CodePointUtils.generateWord(random, codePointSet); 1269 final int unigramProbability = random.nextInt(0xFF); 1270 addUnigramWord(binaryDictionary, word, unigramProbability); 1271 words.add(word); 1272 unigramProbabilities.put(word, unigramProbability); 1273 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1274 binaryDictionary.flushWithGC(); 1275 } 1276 } 1277 for (int i = 0; i < SHORTCUT_COUNT; i++) { 1278 final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); 1279 final int shortcutProbability = random.nextInt(0xF); 1280 final String word = words.get(random.nextInt(words.size())); 1281 final int unigramProbability = unigramProbabilities.get(word); 1282 binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, 1283 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1284 false /* isBlacklisted */, 0 /* timestamp */); 1285 if (shortcutTargets.containsKey(word)) { 1286 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); 1287 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1288 } else { 1289 final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); 1290 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1291 shortcutTargets.put(word, shortcutTargetsOfWord); 1292 } 1293 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1294 binaryDictionary.flushWithGC(); 1295 } 1296 } 1297 1298 for (final String word : words) { 1299 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 1300 false /* isBeginningOfSentence */); 1301 assertEquals((int)unigramProbabilities.get(word), 1302 wordProperty.mProbabilityInfo.mProbability); 1303 if (!shortcutTargets.containsKey(word)) { 1304 // The word does not have shortcut targets. 1305 continue; 1306 } 1307 assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); 1308 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1309 final String targetCodePonts = shortcutTarget.mWord; 1310 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), 1311 shortcutTarget.getProbability()); 1312 } 1313 } 1314 } 1315 1316 public void testDictMigration() { 1317 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1318 testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1319 } 1320 } 1321 1322 private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1323 File dictFile = null; 1324 try { 1325 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1326 } catch (IOException e) { 1327 fail("IOException while writing an initial dictionary : " + e); 1328 } 1329 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1330 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1331 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1332 final int unigramProbability = 100; 1333 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 1334 addUnigramWord(binaryDictionary, "bbb", unigramProbability); 1335 final int bigramProbability = 150; 1336 addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); 1337 final int shortcutProbability = 10; 1338 binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, 1339 false /* isBeginningOfSentence */, false /* isNotAWord */, 1340 false /* isBlacklisted */, 0 /* timestamp */); 1341 binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, 1342 Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, 1343 true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); 1344 binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE, 1345 "aaa", bigramProbability, 0 /* timestamp */); 1346 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1347 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1348 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1349 assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); 1350 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1351 assertTrue(binaryDictionary.isValidDictionary()); 1352 assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); 1353 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1354 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1355 if (canCheckBigramProbability(toFormatVersion)) { 1356 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); 1357 assertEquals(bigramProbability, binaryDictionary.getNgramProbability( 1358 PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa")); 1359 } 1360 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1361 WordProperty wordProperty = binaryDictionary.getWordProperty("ccc", 1362 false /* isBeginningOfSentence */); 1363 assertEquals(1, wordProperty.mShortcutTargets.size()); 1364 assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); 1365 wordProperty = binaryDictionary.getWordProperty("ddd", 1366 false /* isBeginningOfSentence */); 1367 assertTrue(wordProperty.mIsBlacklistEntry); 1368 assertTrue(wordProperty.mIsNotAWord); 1369 } 1370 1371 public void testLargeDictMigration() { 1372 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1373 testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1374 } 1375 } 1376 1377 private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1378 final int UNIGRAM_COUNT = 3000; 1379 final int BIGRAM_COUNT = 3000; 1380 final int codePointSetSize = 50; 1381 final long seed = System.currentTimeMillis(); 1382 final Random random = new Random(seed); 1383 1384 File dictFile = null; 1385 try { 1386 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1387 } catch (IOException e) { 1388 fail("IOException while writing an initial dictionary : " + e); 1389 } 1390 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1391 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1392 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1393 1394 final ArrayList<String> words = new ArrayList<>(); 1395 final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); 1396 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1397 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1398 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1399 1400 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1401 final String word = CodePointUtils.generateWord(random, codePointSet); 1402 final int unigramProbability = random.nextInt(0xFF); 1403 addUnigramWord(binaryDictionary, word, unigramProbability); 1404 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1405 binaryDictionary.flushWithGC(); 1406 } 1407 words.add(word); 1408 unigramProbabilities.put(word, unigramProbability); 1409 } 1410 1411 for (int i = 0; i < BIGRAM_COUNT; i++) { 1412 final int word0Index = random.nextInt(words.size()); 1413 final int word1Index = random.nextInt(words.size()); 1414 if (word0Index == word1Index) { 1415 continue; 1416 } 1417 final String word0 = words.get(word0Index); 1418 final String word1 = words.get(word1Index); 1419 final int unigramProbability = unigramProbabilities.get(word1); 1420 final int bigramProbability = 1421 random.nextInt(0xFF - unigramProbability) + unigramProbability; 1422 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1423 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1424 binaryDictionary.flushWithGC(); 1425 } 1426 final Pair<String, String> bigram = new Pair<>(word0, word1); 1427 bigrams.add(bigram); 1428 bigramProbabilities.put(bigram, bigramProbability); 1429 } 1430 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1431 1432 for (final String word : words) { 1433 assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); 1434 } 1435 assertEquals(unigramProbabilities.size(), Integer.parseInt( 1436 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 1437 1438 for (final Pair<String, String> bigram : bigrams) { 1439 if (canCheckBigramProbability(toFormatVersion)) { 1440 assertEquals((int)bigramProbabilities.get(bigram), 1441 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 1442 } 1443 assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); 1444 } 1445 assertEquals(bigramProbabilities.size(), Integer.parseInt( 1446 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 1447 } 1448 1449 public void testBeginningOfSentence() { 1450 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1451 if (supportsBeginningOfSentence(formatVersion)) { 1452 testBeginningOfSentence(formatVersion); 1453 } 1454 } 1455 } 1456 1457 private void testBeginningOfSentence(final int formatVersion) { 1458 File dictFile = null; 1459 try { 1460 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1461 } catch (IOException e) { 1462 fail("IOException while writing an initial dictionary : " + e); 1463 } 1464 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1465 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1466 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1467 final int dummyProbability = 0; 1468 final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; 1469 final int bigramProbability = 200; 1470 addUnigramWord(binaryDictionary, "aaa", dummyProbability); 1471 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, 1472 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1473 assertEquals(bigramProbability, 1474 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); 1475 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability, 1476 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1477 addUnigramWord(binaryDictionary, "bbb", dummyProbability); 1478 binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability, 1479 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1480 binaryDictionary.flushWithGC(); 1481 assertEquals(bigramProbability, 1482 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa")); 1483 assertEquals(bigramProbability, 1484 binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb")); 1485 } 1486 1487 public void testGetMaxFrequencyOfExactMatches() { 1488 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1489 testGetMaxFrequencyOfExactMatches(formatVersion); 1490 } 1491 } 1492 1493 private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { 1494 File dictFile = null; 1495 try { 1496 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1497 } catch (IOException e) { 1498 fail("IOException while writing an initial dictionary : " + e); 1499 } 1500 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1501 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1502 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1503 addUnigramWord(binaryDictionary, "abc", 10); 1504 addUnigramWord(binaryDictionary, "aBc", 15); 1505 assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1506 addUnigramWord(binaryDictionary, "ab'c", 20); 1507 assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1508 addUnigramWord(binaryDictionary, "a-b-c", 25); 1509 assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1510 addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); 1511 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1512 addUnigramWord(binaryDictionary, "ab c", 255); 1513 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1514 } 1515} 1516