BinaryDictionaryTests.java revision d9b8602f4862c2c876e1499aad7ca7d77ea66595
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.test.AndroidTestCase; 20import android.test.suitebuilder.annotation.LargeTest; 21import android.text.TextUtils; 22import android.util.Pair; 23 24import com.android.inputmethod.latin.makedict.CodePointUtils; 25import com.android.inputmethod.latin.makedict.FormatSpec; 26import com.android.inputmethod.latin.makedict.WeightedString; 27import com.android.inputmethod.latin.makedict.WordProperty; 28import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 29import com.android.inputmethod.latin.utils.FileUtils; 30import com.android.inputmethod.latin.utils.LanguageModelParam; 31 32import java.io.File; 33import java.io.IOException; 34import java.util.ArrayList; 35import java.util.HashMap; 36import java.util.HashSet; 37import java.util.Locale; 38import java.util.Map; 39import java.util.Random; 40 41// TODO Use the seed passed as an argument for makedict test. 42@LargeTest 43public class BinaryDictionaryTests extends AndroidTestCase { 44 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 45 private static final String TEST_LOCALE = "test"; 46 private static final int[] DICT_FORMAT_VERSIONS = 47 new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV }; 48 49 private static boolean canCheckBigramProbability(final int formatVersion) { 50 return formatVersion > FormatSpec.VERSION401; 51 } 52 53 private static boolean supportsBeginningOfSentence(final int formatVersion) { 54 return formatVersion > FormatSpec.VERSION401; 55 } 56 57 private File createEmptyDictionaryAndGetFile(final String dictId, 58 final int formatVersion) throws IOException { 59 if (formatVersion == FormatSpec.VERSION4 60 || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING 61 || formatVersion == FormatSpec.VERSION4_DEV) { 62 return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); 63 } else { 64 throw new IOException("Dictionary format version " + formatVersion 65 + " is not supported."); 66 } 67 } 68 69 private File createEmptyVer4DictionaryAndGetFile(final String dictId, 70 final int formatVersion) throws IOException { 71 final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, 72 getContext().getCacheDir()); 73 file.delete(); 74 file.mkdir(); 75 Map<String, String> attributeMap = new HashMap<>(); 76 if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, 77 Locale.ENGLISH, attributeMap)) { 78 return file; 79 } else { 80 throw new IOException("Empty dictionary " + file.getAbsolutePath() 81 + " cannot be created. Format version: " + formatVersion); 82 } 83 } 84 85 public void testIsValidDictionary() { 86 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 87 testIsValidDictionary(formatVersion); 88 } 89 } 90 91 private void testIsValidDictionary(final int formatVersion) { 92 File dictFile = null; 93 try { 94 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 95 } catch (IOException e) { 96 fail("IOException while writing an initial dictionary : " + e); 97 } 98 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 99 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 100 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 101 assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 102 binaryDictionary.isValidDictionary()); 103 binaryDictionary.close(); 104 assertFalse("binaryDictionary must be invalid after closing.", 105 binaryDictionary.isValidDictionary()); 106 FileUtils.deleteRecursively(dictFile); 107 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, 108 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), 109 TEST_LOCALE, true /* isUpdatable */); 110 assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 111 binaryDictionary.isValidDictionary()); 112 binaryDictionary.close(); 113 } 114 115 public void testConstructingDictionaryOnMemory() { 116 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 117 testConstructingDictionaryOnMemory(formatVersion); 118 } 119 } 120 121 private void testConstructingDictionaryOnMemory(final int formatVersion) { 122 File dictFile = null; 123 try { 124 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 125 } catch (IOException e) { 126 fail("IOException while writing an initial dictionary : " + e); 127 } 128 FileUtils.deleteRecursively(dictFile); 129 assertFalse(dictFile.exists()); 130 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 131 true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion, 132 new HashMap<String, String>()); 133 assertTrue(binaryDictionary.isValidDictionary()); 134 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 135 final int probability = 100; 136 addUnigramWord(binaryDictionary, "word", probability); 137 assertEquals(probability, binaryDictionary.getFrequency("word")); 138 assertFalse(dictFile.exists()); 139 binaryDictionary.flush(); 140 assertTrue(dictFile.exists()); 141 assertTrue(binaryDictionary.isValidDictionary()); 142 assertEquals(formatVersion, binaryDictionary.getFormatVersion()); 143 assertEquals(probability, binaryDictionary.getFrequency("word")); 144 binaryDictionary.close(); 145 dictFile.delete(); 146 } 147 148 public void testAddTooLongWord() { 149 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 150 testAddTooLongWord(formatVersion); 151 } 152 } 153 154 private void testAddTooLongWord(final int formatVersion) { 155 File dictFile = null; 156 try { 157 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 158 } catch (IOException e) { 159 fail("IOException while writing an initial dictionary : " + e); 160 } 161 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 162 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 163 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 164 165 final StringBuffer stringBuilder = new StringBuffer(); 166 for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) { 167 stringBuilder.append('a'); 168 } 169 final String validLongWord = stringBuilder.toString(); 170 stringBuilder.append('a'); 171 final String invalidLongWord = stringBuilder.toString(); 172 final int probability = 100; 173 addUnigramWord(binaryDictionary, "aaa", probability); 174 addUnigramWord(binaryDictionary, validLongWord, probability); 175 addUnigramWord(binaryDictionary, invalidLongWord, probability); 176 // Too long short cut. 177 binaryDictionary.addUnigramEntry("a", probability, invalidLongWord, 178 10 /* shortcutProbability */, false /* isBeginningOfSentence */, 179 false /* isNotAWord */, false /* isBlacklisted */, 180 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 181 addUnigramWord(binaryDictionary, "abc", probability); 182 final int updatedProbability = 200; 183 // Update. 184 addUnigramWord(binaryDictionary, validLongWord, updatedProbability); 185 addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability); 186 addUnigramWord(binaryDictionary, "abc", updatedProbability); 187 188 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 189 assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord)); 190 assertEquals(BinaryDictionary.NOT_A_PROBABILITY, 191 binaryDictionary.getFrequency(invalidLongWord)); 192 assertEquals(updatedProbability, binaryDictionary.getFrequency("abc")); 193 dictFile.delete(); 194 } 195 196 private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word, 197 final int probability) { 198 binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */, 199 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, 200 false /* isBeginningOfSentence */, false /* isNotAWord */, 201 false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 202 } 203 204 private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0, 205 final String word1, final int probability) { 206 binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability, 207 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 208 } 209 210 private static boolean isValidBigram(final BinaryDictionary binaryDictionary, 211 final String word0, final String word1) { 212 return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1); 213 } 214 215 private static void removeBigramEntry(final BinaryDictionary binaryDictionary, 216 final String word0, final String word1) { 217 binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1); 218 } 219 220 private static int getBigramProbability(final BinaryDictionary binaryDictionary, 221 final String word0, final String word1) { 222 return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1); 223 } 224 225 public void testAddUnigramWord() { 226 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 227 testAddUnigramWord(formatVersion); 228 } 229 } 230 231 private void testAddUnigramWord(final int formatVersion) { 232 File dictFile = null; 233 try { 234 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 235 } catch (IOException e) { 236 fail("IOException while writing an initial dictionary : " + e); 237 } 238 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 239 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 240 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 241 242 final int probability = 100; 243 addUnigramWord(binaryDictionary, "aaa", probability); 244 // Reallocate and create. 245 addUnigramWord(binaryDictionary, "aab", probability); 246 // Insert into children. 247 addUnigramWord(binaryDictionary, "aac", probability); 248 // Make terminal. 249 addUnigramWord(binaryDictionary, "aa", probability); 250 // Create children. 251 addUnigramWord(binaryDictionary, "aaaa", probability); 252 // Reallocate and make termianl. 253 addUnigramWord(binaryDictionary, "a", probability); 254 255 final int updatedProbability = 200; 256 // Update. 257 addUnigramWord(binaryDictionary, "aaa", updatedProbability); 258 259 assertEquals(probability, binaryDictionary.getFrequency("aab")); 260 assertEquals(probability, binaryDictionary.getFrequency("aac")); 261 assertEquals(probability, binaryDictionary.getFrequency("aa")); 262 assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 263 assertEquals(probability, binaryDictionary.getFrequency("a")); 264 assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 265 266 dictFile.delete(); 267 } 268 269 public void testRandomlyAddUnigramWord() { 270 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 271 testRandomlyAddUnigramWord(formatVersion); 272 } 273 } 274 275 private void testRandomlyAddUnigramWord(final int formatVersion) { 276 final int wordCount = 1000; 277 final int codePointSetSize = 50; 278 final long seed = System.currentTimeMillis(); 279 280 File dictFile = null; 281 try { 282 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 283 } catch (IOException e) { 284 fail("IOException while writing an initial dictionary : " + e); 285 } 286 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 287 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 288 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 289 290 final HashMap<String, Integer> probabilityMap = new HashMap<>(); 291 // Test a word that isn't contained within the dictionary. 292 final Random random = new Random(seed); 293 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 294 for (int i = 0; i < wordCount; ++i) { 295 final String word = CodePointUtils.generateWord(random, codePointSet); 296 probabilityMap.put(word, random.nextInt(0xFF)); 297 } 298 for (String word : probabilityMap.keySet()) { 299 addUnigramWord(binaryDictionary, word, probabilityMap.get(word)); 300 } 301 for (String word : probabilityMap.keySet()) { 302 assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 303 } 304 dictFile.delete(); 305 } 306 307 public void testAddBigramWords() { 308 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 309 testAddBigramWords(formatVersion); 310 } 311 } 312 313 private void testAddBigramWords(final int formatVersion) { 314 File dictFile = null; 315 try { 316 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 317 } catch (IOException e) { 318 fail("IOException while writing an initial dictionary : " + e); 319 } 320 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 321 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 322 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 323 324 final int unigramProbability = 100; 325 final int bigramProbability = 150; 326 final int updatedBigramProbability = 200; 327 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 328 addUnigramWord(binaryDictionary, "abb", unigramProbability); 329 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 330 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 331 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 332 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 333 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 334 335 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 336 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 337 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 338 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 339 if (canCheckBigramProbability(formatVersion)) { 340 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 341 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 342 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 343 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 344 } 345 346 addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability); 347 if (canCheckBigramProbability(formatVersion)) { 348 assertEquals(updatedBigramProbability, 349 getBigramProbability(binaryDictionary, "aaa", "abb")); 350 } 351 352 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 353 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 354 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 355 assertEquals(Dictionary.NOT_A_PROBABILITY, 356 getBigramProbability(binaryDictionary, "bcc", "aaa")); 357 assertEquals(Dictionary.NOT_A_PROBABILITY, 358 getBigramProbability(binaryDictionary, "bcc", "bbc")); 359 assertEquals(Dictionary.NOT_A_PROBABILITY, 360 getBigramProbability(binaryDictionary, "aaa", "aaa")); 361 362 // Testing bigram link. 363 addUnigramWord(binaryDictionary, "abcde", unigramProbability); 364 addUnigramWord(binaryDictionary, "fghij", unigramProbability); 365 addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability); 366 addUnigramWord(binaryDictionary, "fgh", unigramProbability); 367 addUnigramWord(binaryDictionary, "abc", unigramProbability); 368 addUnigramWord(binaryDictionary, "f", unigramProbability); 369 370 if (canCheckBigramProbability(formatVersion)) { 371 assertEquals(bigramProbability, 372 getBigramProbability(binaryDictionary, "abcde", "fghij")); 373 } 374 assertEquals(Dictionary.NOT_A_PROBABILITY, 375 getBigramProbability(binaryDictionary, "abcde", "fgh")); 376 addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability); 377 if (canCheckBigramProbability(formatVersion)) { 378 assertEquals(updatedBigramProbability, 379 getBigramProbability(binaryDictionary, "abcde", "fghij")); 380 } 381 382 dictFile.delete(); 383 } 384 385 public void testRandomlyAddBigramWords() { 386 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 387 testRandomlyAddBigramWords(formatVersion); 388 } 389 } 390 391 private void testRandomlyAddBigramWords(final int formatVersion) { 392 final int wordCount = 100; 393 final int bigramCount = 1000; 394 final int codePointSetSize = 50; 395 final long seed = System.currentTimeMillis(); 396 final Random random = new Random(seed); 397 398 File dictFile = null; 399 try { 400 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 401 } catch (IOException e) { 402 fail("IOException while writing an initial dictionary : " + e); 403 } 404 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 405 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 406 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 407 408 final ArrayList<String> words = new ArrayList<>(); 409 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 410 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 411 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 412 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 413 414 for (int i = 0; i < wordCount; ++i) { 415 final String word = CodePointUtils.generateWord(random, codePointSet); 416 words.add(word); 417 final int unigramProbability = random.nextInt(0xFF); 418 unigramProbabilities.put(word, unigramProbability); 419 addUnigramWord(binaryDictionary, word, unigramProbability); 420 } 421 422 for (int i = 0; i < bigramCount; i++) { 423 final String word0 = words.get(random.nextInt(wordCount)); 424 final String word1 = words.get(random.nextInt(wordCount)); 425 if (TextUtils.equals(word0, word1)) { 426 continue; 427 } 428 final Pair<String, String> bigram = new Pair<>(word0, word1); 429 bigramWords.add(bigram); 430 final int unigramProbability = unigramProbabilities.get(word1); 431 final int bigramProbability = 432 unigramProbability + random.nextInt(0xFF - unigramProbability); 433 bigramProbabilities.put(bigram, bigramProbability); 434 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 435 } 436 437 for (final Pair<String, String> bigram : bigramWords) { 438 final int bigramProbability = bigramProbabilities.get(bigram); 439 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 440 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 441 if (canCheckBigramProbability(formatVersion)) { 442 assertEquals(bigramProbability, 443 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 444 } 445 } 446 447 dictFile.delete(); 448 } 449 450 public void testRemoveBigramWords() { 451 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 452 testRemoveBigramWords(formatVersion); 453 } 454 } 455 456 private void testRemoveBigramWords(final int formatVersion) { 457 File dictFile = null; 458 try { 459 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 460 } catch (IOException e) { 461 fail("IOException while writing an initial dictionary : " + e); 462 } 463 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 464 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 465 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 466 final int unigramProbability = 100; 467 final int bigramProbability = 150; 468 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 469 addUnigramWord(binaryDictionary, "abb", unigramProbability); 470 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 471 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 472 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 473 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 474 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 475 476 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 477 assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc")); 478 assertTrue(isValidBigram(binaryDictionary, "abb", "aaa")); 479 assertTrue(isValidBigram(binaryDictionary, "abb", "bcc")); 480 481 removeBigramEntry(binaryDictionary, "aaa", "abb"); 482 assertFalse(isValidBigram(binaryDictionary, "aaa", "abb")); 483 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 484 assertTrue(isValidBigram(binaryDictionary, "aaa", "abb")); 485 486 487 removeBigramEntry(binaryDictionary, "aaa", "bcc"); 488 assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc")); 489 removeBigramEntry(binaryDictionary, "abb", "aaa"); 490 assertFalse(isValidBigram(binaryDictionary, "abb", "aaa")); 491 removeBigramEntry(binaryDictionary, "abb", "bcc"); 492 assertFalse(isValidBigram(binaryDictionary, "abb", "bcc")); 493 494 removeBigramEntry(binaryDictionary, "aaa", "abb"); 495 // Test remove non-existing bigram operation. 496 removeBigramEntry(binaryDictionary, "aaa", "abb"); 497 removeBigramEntry(binaryDictionary, "bcc", "aaa"); 498 499 dictFile.delete(); 500 } 501 502 public void testFlushDictionary() { 503 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 504 testFlushDictionary(formatVersion); 505 } 506 } 507 508 private void testFlushDictionary(final int formatVersion) { 509 File dictFile = null; 510 try { 511 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 512 } catch (IOException e) { 513 fail("IOException while writing an initial dictionary : " + e); 514 } 515 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 516 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 517 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 518 519 final int probability = 100; 520 addUnigramWord(binaryDictionary, "aaa", probability); 521 addUnigramWord(binaryDictionary, "abcd", probability); 522 // Close without flushing. 523 binaryDictionary.close(); 524 525 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 526 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 527 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 528 529 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 530 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 531 532 addUnigramWord(binaryDictionary, "aaa", probability); 533 addUnigramWord(binaryDictionary, "abcd", probability); 534 binaryDictionary.flush(); 535 binaryDictionary.close(); 536 537 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 538 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 539 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 540 541 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 542 assertEquals(probability, binaryDictionary.getFrequency("abcd")); 543 addUnigramWord(binaryDictionary, "bcde", probability); 544 binaryDictionary.flush(); 545 binaryDictionary.close(); 546 547 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 548 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 549 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 550 assertEquals(probability, binaryDictionary.getFrequency("bcde")); 551 binaryDictionary.close(); 552 553 dictFile.delete(); 554 } 555 556 public void testFlushWithGCDictionary() { 557 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 558 testFlushWithGCDictionary(formatVersion); 559 } 560 } 561 562 private void testFlushWithGCDictionary(final int formatVersion) { 563 File dictFile = null; 564 try { 565 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 566 } catch (IOException e) { 567 fail("IOException while writing an initial dictionary : " + e); 568 } 569 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 570 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 571 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 572 573 final int unigramProbability = 100; 574 final int bigramProbability = 150; 575 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 576 addUnigramWord(binaryDictionary, "abb", unigramProbability); 577 addUnigramWord(binaryDictionary, "bcc", unigramProbability); 578 addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability); 579 addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability); 580 addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability); 581 addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability); 582 binaryDictionary.flushWithGC(); 583 binaryDictionary.close(); 584 585 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 586 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 587 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 588 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 589 assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 590 assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 591 if (canCheckBigramProbability(formatVersion)) { 592 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb")); 593 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc")); 594 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa")); 595 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc")); 596 } 597 assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa")); 598 assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc")); 599 assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa")); 600 binaryDictionary.flushWithGC(); 601 binaryDictionary.close(); 602 603 dictFile.delete(); 604 } 605 606 public void testAddBigramWordsAndFlashWithGC() { 607 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 608 testAddBigramWordsAndFlashWithGC(formatVersion); 609 } 610 } 611 612 // TODO: Evaluate performance of GC 613 private void testAddBigramWordsAndFlashWithGC(final int formatVersion) { 614 final int wordCount = 100; 615 final int bigramCount = 1000; 616 final int codePointSetSize = 30; 617 final long seed = System.currentTimeMillis(); 618 final Random random = new Random(seed); 619 620 File dictFile = null; 621 try { 622 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 623 } catch (IOException e) { 624 fail("IOException while writing an initial dictionary : " + e); 625 } 626 627 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 628 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 629 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 630 631 final ArrayList<String> words = new ArrayList<>(); 632 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 633 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 634 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 635 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 636 637 for (int i = 0; i < wordCount; ++i) { 638 final String word = CodePointUtils.generateWord(random, codePointSet); 639 words.add(word); 640 final int unigramProbability = random.nextInt(0xFF); 641 unigramProbabilities.put(word, unigramProbability); 642 addUnigramWord(binaryDictionary, word, unigramProbability); 643 } 644 645 for (int i = 0; i < bigramCount; i++) { 646 final String word0 = words.get(random.nextInt(wordCount)); 647 final String word1 = words.get(random.nextInt(wordCount)); 648 if (TextUtils.equals(word0, word1)) { 649 continue; 650 } 651 final Pair<String, String> bigram = new Pair<>(word0, word1); 652 bigramWords.add(bigram); 653 final int unigramProbability = unigramProbabilities.get(word1); 654 final int bigramProbability = 655 unigramProbability + random.nextInt(0xFF - unigramProbability); 656 bigramProbabilities.put(bigram, bigramProbability); 657 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 658 } 659 660 binaryDictionary.flushWithGC(); 661 binaryDictionary.close(); 662 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 663 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 664 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 665 666 667 for (final Pair<String, String> bigram : bigramWords) { 668 final int bigramProbability = bigramProbabilities.get(bigram); 669 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 670 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 671 if (canCheckBigramProbability(formatVersion)) { 672 assertEquals(bigramProbability, 673 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 674 } 675 } 676 677 dictFile.delete(); 678 } 679 680 public void testRandomOperationsAndFlashWithGC() { 681 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 682 testRandomOperationsAndFlashWithGC(formatVersion); 683 } 684 } 685 686 private void testRandomOperationsAndFlashWithGC(final int formatVersion) { 687 final int flashWithGCIterationCount = 50; 688 final int operationCountInEachIteration = 200; 689 final int initialUnigramCount = 100; 690 final float addUnigramProb = 0.5f; 691 final float addBigramProb = 0.8f; 692 final float removeBigramProb = 0.2f; 693 final int codePointSetSize = 30; 694 695 final long seed = System.currentTimeMillis(); 696 final Random random = new Random(seed); 697 698 File dictFile = null; 699 try { 700 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 701 } catch (IOException e) { 702 fail("IOException while writing an initial dictionary : " + e); 703 } 704 705 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 706 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 707 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 708 final ArrayList<String> words = new ArrayList<>(); 709 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>(); 710 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 711 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 712 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 713 for (int i = 0; i < initialUnigramCount; ++i) { 714 final String word = CodePointUtils.generateWord(random, codePointSet); 715 words.add(word); 716 final int unigramProbability = random.nextInt(0xFF); 717 unigramProbabilities.put(word, unigramProbability); 718 addUnigramWord(binaryDictionary, word, unigramProbability); 719 } 720 binaryDictionary.flushWithGC(); 721 binaryDictionary.close(); 722 723 for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 724 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 725 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 726 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 727 for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 728 // Add unigram. 729 if (random.nextFloat() < addUnigramProb) { 730 final String word = CodePointUtils.generateWord(random, codePointSet); 731 words.add(word); 732 final int unigramProbability = random.nextInt(0xFF); 733 unigramProbabilities.put(word, unigramProbability); 734 addUnigramWord(binaryDictionary, word, unigramProbability); 735 } 736 // Add bigram. 737 if (random.nextFloat() < addBigramProb && words.size() > 2) { 738 final int word0Index = random.nextInt(words.size()); 739 int word1Index = random.nextInt(words.size() - 1); 740 if (word0Index <= word1Index) { 741 word1Index++; 742 } 743 final String word0 = words.get(word0Index); 744 final String word1 = words.get(word1Index); 745 if (TextUtils.equals(word0, word1)) { 746 continue; 747 } 748 final int unigramProbability = unigramProbabilities.get(word1); 749 final int bigramProbability = 750 unigramProbability + random.nextInt(0xFF - unigramProbability); 751 final Pair<String, String> bigram = new Pair<>(word0, word1); 752 bigramWords.add(bigram); 753 bigramProbabilities.put(bigram, bigramProbability); 754 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 755 } 756 // Remove bigram. 757 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { 758 final int bigramIndex = random.nextInt(bigramWords.size()); 759 final Pair<String, String> bigram = bigramWords.get(bigramIndex); 760 bigramWords.remove(bigramIndex); 761 bigramProbabilities.remove(bigram); 762 removeBigramEntry(binaryDictionary, bigram.first, bigram.second); 763 } 764 } 765 766 // Test whether the all unigram operations are collectlly handled. 767 for (int i = 0; i < words.size(); i++) { 768 final String word = words.get(i); 769 final int unigramProbability = unigramProbabilities.get(word); 770 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 771 } 772 // Test whether the all bigram operations are collectlly handled. 773 for (int i = 0; i < bigramWords.size(); i++) { 774 final Pair<String, String> bigram = bigramWords.get(i); 775 final int probability; 776 if (bigramProbabilities.containsKey(bigram)) { 777 final int bigramProbability = bigramProbabilities.get(bigram); 778 probability = bigramProbability; 779 } else { 780 probability = Dictionary.NOT_A_PROBABILITY; 781 } 782 783 if (canCheckBigramProbability(formatVersion)) { 784 assertEquals(probability, 785 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 786 } 787 assertEquals(probability != Dictionary.NOT_A_PROBABILITY, 788 isValidBigram(binaryDictionary, bigram.first, bigram.second)); 789 } 790 binaryDictionary.flushWithGC(); 791 binaryDictionary.close(); 792 } 793 794 dictFile.delete(); 795 } 796 797 public void testAddManyUnigramsAndFlushWithGC() { 798 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 799 testAddManyUnigramsAndFlushWithGC(formatVersion); 800 } 801 } 802 803 private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) { 804 final int flashWithGCIterationCount = 3; 805 final int codePointSetSize = 50; 806 807 final long seed = System.currentTimeMillis(); 808 final Random random = new Random(seed); 809 810 File dictFile = null; 811 try { 812 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 813 } catch (IOException e) { 814 fail("IOException while writing an initial dictionary : " + e); 815 } 816 817 final ArrayList<String> words = new ArrayList<>(); 818 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 819 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 820 821 BinaryDictionary binaryDictionary; 822 for (int i = 0; i < flashWithGCIterationCount; i++) { 823 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 824 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 825 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 826 while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 827 final String word = CodePointUtils.generateWord(random, codePointSet); 828 words.add(word); 829 final int unigramProbability = random.nextInt(0xFF); 830 unigramProbabilities.put(word, unigramProbability); 831 addUnigramWord(binaryDictionary, word, unigramProbability); 832 } 833 834 for (int j = 0; j < words.size(); j++) { 835 final String word = words.get(j); 836 final int unigramProbability = unigramProbabilities.get(word); 837 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 838 } 839 840 binaryDictionary.flushWithGC(); 841 binaryDictionary.close(); 842 } 843 844 dictFile.delete(); 845 } 846 847 public void testUnigramAndBigramCount() { 848 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 849 testUnigramAndBigramCount(formatVersion); 850 } 851 } 852 853 private void testUnigramAndBigramCount(final int formatVersion) { 854 final int flashWithGCIterationCount = 10; 855 final int codePointSetSize = 50; 856 final int unigramCountPerIteration = 1000; 857 final int bigramCountPerIteration = 2000; 858 final long seed = System.currentTimeMillis(); 859 final Random random = new Random(seed); 860 861 File dictFile = null; 862 try { 863 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 864 } catch (IOException e) { 865 fail("IOException while writing an initial dictionary : " + e); 866 } 867 868 final ArrayList<String> words = new ArrayList<>(); 869 final HashSet<Pair<String, String>> bigrams = new HashSet<>(); 870 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 871 872 BinaryDictionary binaryDictionary; 873 for (int i = 0; i < flashWithGCIterationCount; i++) { 874 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 875 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 876 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 877 for (int j = 0; j < unigramCountPerIteration; j++) { 878 final String word = CodePointUtils.generateWord(random, codePointSet); 879 words.add(word); 880 final int unigramProbability = random.nextInt(0xFF); 881 addUnigramWord(binaryDictionary, word, unigramProbability); 882 } 883 for (int j = 0; j < bigramCountPerIteration; j++) { 884 final String word0 = words.get(random.nextInt(words.size())); 885 final String word1 = words.get(random.nextInt(words.size())); 886 if (TextUtils.equals(word0, word1)) { 887 continue; 888 } 889 bigrams.add(new Pair<>(word0, word1)); 890 final int bigramProbability = random.nextInt(0xF); 891 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 892 } 893 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 894 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 895 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 896 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 897 binaryDictionary.flushWithGC(); 898 assertEquals(new HashSet<>(words).size(), Integer.parseInt( 899 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 900 assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt( 901 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 902 binaryDictionary.close(); 903 } 904 905 dictFile.delete(); 906 } 907 908 public void testAddMultipleDictionaryEntries() { 909 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 910 testAddMultipleDictionaryEntries(formatVersion); 911 } 912 } 913 914 private void testAddMultipleDictionaryEntries(final int formatVersion) { 915 final int codePointSetSize = 20; 916 final int lmParamCount = 1000; 917 final double bigramContinueRate = 0.9; 918 final long seed = System.currentTimeMillis(); 919 final Random random = new Random(seed); 920 921 File dictFile = null; 922 try { 923 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 924 } catch (IOException e) { 925 fail("IOException while writing an initial dictionary : " + e); 926 } 927 928 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 929 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 930 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 931 932 final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount]; 933 String prevWord = null; 934 for (int i = 0; i < languageModelParams.length; i++) { 935 final String word = CodePointUtils.generateWord(random, codePointSet); 936 final int probability = random.nextInt(0xFF); 937 final int bigramProbability = probability + random.nextInt(0xFF - probability); 938 unigramProbabilities.put(word, probability); 939 if (prevWord == null) { 940 languageModelParams[i] = new LanguageModelParam(word, probability, 941 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 942 } else { 943 languageModelParams[i] = new LanguageModelParam(prevWord, word, probability, 944 bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP); 945 bigramProbabilities.put(new Pair<>(prevWord, word), 946 bigramProbability); 947 } 948 prevWord = (random.nextDouble() < bigramContinueRate) ? word : null; 949 } 950 951 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 952 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 953 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 954 binaryDictionary.addMultipleDictionaryEntries(languageModelParams); 955 956 for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) { 957 assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey())); 958 } 959 960 for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) { 961 final String word0 = entry.getKey().first; 962 final String word1 = entry.getKey().second; 963 final int bigramProbability = entry.getValue(); 964 assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY, 965 isValidBigram(binaryDictionary, word0, word1)); 966 if (canCheckBigramProbability(formatVersion)) { 967 assertEquals(bigramProbability, 968 getBigramProbability(binaryDictionary, word0, word1)); 969 } 970 } 971 } 972 973 public void testGetWordProperties() { 974 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 975 testGetWordProperties(formatVersion); 976 } 977 } 978 979 private void testGetWordProperties(final int formatVersion) { 980 final long seed = System.currentTimeMillis(); 981 final Random random = new Random(seed); 982 final int UNIGRAM_COUNT = 1000; 983 final int BIGRAM_COUNT = 1000; 984 final int codePointSetSize = 20; 985 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 986 987 File dictFile = null; 988 try { 989 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 990 } catch (IOException e) { 991 fail("IOException while writing an initial dictionary : " + e); 992 } 993 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 994 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 995 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 996 997 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); 998 assertFalse(invalidWordProperty.isValid()); 999 1000 final ArrayList<String> words = new ArrayList<>(); 1001 final HashMap<String, Integer> wordProbabilities = new HashMap<>(); 1002 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1003 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1004 1005 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1006 final String word = CodePointUtils.generateWord(random, codePointSet); 1007 final int unigramProbability = random.nextInt(0xFF); 1008 final boolean isNotAWord = random.nextBoolean(); 1009 final boolean isBlacklisted = random.nextBoolean(); 1010 // TODO: Add tests for historical info. 1011 binaryDictionary.addUnigramEntry(word, unigramProbability, 1012 null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY, 1013 false /* isBeginningOfSentence */, isNotAWord, isBlacklisted, 1014 BinaryDictionary.NOT_A_VALID_TIMESTAMP); 1015 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1016 binaryDictionary.flushWithGC(); 1017 } 1018 words.add(word); 1019 wordProbabilities.put(word, unigramProbability); 1020 final WordProperty wordProperty = binaryDictionary.getWordProperty(word); 1021 assertEquals(word, wordProperty.mWord); 1022 assertTrue(wordProperty.isValid()); 1023 assertEquals(isNotAWord, wordProperty.mIsNotAWord); 1024 assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); 1025 assertEquals(false, wordProperty.mHasBigrams); 1026 assertEquals(false, wordProperty.mHasShortcuts); 1027 assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); 1028 assertTrue(wordProperty.mShortcutTargets.isEmpty()); 1029 } 1030 1031 for (int i = 0; i < BIGRAM_COUNT; i++) { 1032 final int word0Index = random.nextInt(wordProbabilities.size()); 1033 final int word1Index = random.nextInt(wordProbabilities.size()); 1034 if (word0Index == word1Index) { 1035 continue; 1036 } 1037 final String word0 = words.get(word0Index); 1038 final String word1 = words.get(word1Index); 1039 final int unigramProbability = wordProbabilities.get(word1); 1040 final int bigramProbability = 1041 unigramProbability + random.nextInt(0xFF - unigramProbability); 1042 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1043 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1044 binaryDictionary.flushWithGC(); 1045 } 1046 if (!bigrams.containsKey(word0)) { 1047 final HashSet<String> bigramWord1s = new HashSet<>(); 1048 bigrams.put(word0, bigramWord1s); 1049 } 1050 bigrams.get(word0).add(word1); 1051 bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability); 1052 } 1053 1054 for (int i = 0; i < words.size(); i++) { 1055 final String word0 = words.get(i); 1056 if (!bigrams.containsKey(word0)) { 1057 continue; 1058 } 1059 final HashSet<String> bigramWord1s = bigrams.get(word0); 1060 final WordProperty wordProperty = binaryDictionary.getWordProperty(word0); 1061 assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); 1062 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1063 final String word1 = wordProperty.mBigrams.get(j).mWord; 1064 assertTrue(bigramWord1s.contains(word1)); 1065 if (canCheckBigramProbability(formatVersion)) { 1066 final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); 1067 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1068 } 1069 } 1070 } 1071 } 1072 1073 public void testIterateAllWords() { 1074 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1075 testIterateAllWords(formatVersion); 1076 } 1077 } 1078 1079 private void testIterateAllWords(final int formatVersion) { 1080 final long seed = System.currentTimeMillis(); 1081 final Random random = new Random(seed); 1082 final int UNIGRAM_COUNT = 1000; 1083 final int BIGRAM_COUNT = 1000; 1084 final int codePointSetSize = 20; 1085 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1086 1087 File dictFile = null; 1088 try { 1089 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1090 } catch (IOException e) { 1091 fail("IOException while writing an initial dictionary : " + e); 1092 } 1093 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1094 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1095 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1096 1097 final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord"); 1098 assertFalse(invalidWordProperty.isValid()); 1099 1100 final ArrayList<String> words = new ArrayList<>(); 1101 final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>(); 1102 final HashMap<String, HashSet<String>> bigrams = new HashMap<>(); 1103 final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater = 1104 new HashMap<>(); 1105 1106 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1107 final String word = CodePointUtils.generateWord(random, codePointSet); 1108 final int unigramProbability = random.nextInt(0xFF); 1109 addUnigramWord(binaryDictionary, word, unigramProbability); 1110 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1111 binaryDictionary.flushWithGC(); 1112 } 1113 words.add(word); 1114 wordProbabilitiesToCheckLater.put(word, unigramProbability); 1115 } 1116 1117 for (int i = 0; i < BIGRAM_COUNT; i++) { 1118 final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1119 final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size()); 1120 if (word0Index == word1Index) { 1121 continue; 1122 } 1123 final String word0 = words.get(word0Index); 1124 final String word1 = words.get(word1Index); 1125 final int unigramProbability = wordProbabilitiesToCheckLater.get(word1); 1126 final int bigramProbability = 1127 unigramProbability + random.nextInt(0xFF - unigramProbability); 1128 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1129 if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) { 1130 binaryDictionary.flushWithGC(); 1131 } 1132 if (!bigrams.containsKey(word0)) { 1133 final HashSet<String> bigramWord1s = new HashSet<>(); 1134 bigrams.put(word0, bigramWord1s); 1135 } 1136 bigrams.get(word0).add(word1); 1137 bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability); 1138 } 1139 1140 final HashSet<String> wordSet = new HashSet<>(words); 1141 final HashSet<Pair<String, String>> bigramSet = 1142 new HashSet<>(bigramProbabilitiesToCheckLater.keySet()); 1143 int token = 0; 1144 do { 1145 final BinaryDictionary.GetNextWordPropertyResult result = 1146 binaryDictionary.getNextWordProperty(token); 1147 final WordProperty wordProperty = result.mWordProperty; 1148 final String word0 = wordProperty.mWord; 1149 assertEquals((int)wordProbabilitiesToCheckLater.get(word0), 1150 wordProperty.mProbabilityInfo.mProbability); 1151 wordSet.remove(word0); 1152 final HashSet<String> bigramWord1s = bigrams.get(word0); 1153 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 1154 final String word1 = wordProperty.mBigrams.get(j).mWord; 1155 assertTrue(bigramWord1s.contains(word1)); 1156 final Pair<String, String> bigram = new Pair<>(word0, word1); 1157 if (canCheckBigramProbability(formatVersion)) { 1158 final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); 1159 assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); 1160 } 1161 bigramSet.remove(bigram); 1162 } 1163 token = result.mNextToken; 1164 } while (token != 0); 1165 assertTrue(wordSet.isEmpty()); 1166 assertTrue(bigramSet.isEmpty()); 1167 } 1168 1169 public void testAddShortcuts() { 1170 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1171 testAddShortcuts(formatVersion); 1172 } 1173 } 1174 1175 private void testAddShortcuts(final int formatVersion) { 1176 File dictFile = null; 1177 try { 1178 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1179 } catch (IOException e) { 1180 fail("IOException while writing an initial dictionary : " + e); 1181 } 1182 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1183 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1184 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1185 1186 final int unigramProbability = 100; 1187 final int shortcutProbability = 10; 1188 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1189 shortcutProbability, false /* isBeginningOfSentence */, 1190 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1191 WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); 1192 assertEquals(1, wordProperty.mShortcutTargets.size()); 1193 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1194 assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); 1195 final int updatedShortcutProbability = 2; 1196 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz", 1197 updatedShortcutProbability, false /* isBeginningOfSentence */, 1198 false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); 1199 wordProperty = binaryDictionary.getWordProperty("aaa"); 1200 assertEquals(1, wordProperty.mShortcutTargets.size()); 1201 assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); 1202 assertEquals(updatedShortcutProbability, 1203 wordProperty.mShortcutTargets.get(0).getProbability()); 1204 binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy", 1205 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1206 false /* isBlacklisted */, 0 /* timestamp */); 1207 final HashMap<String, Integer> shortcutTargets = new HashMap<>(); 1208 shortcutTargets.put("zzz", updatedShortcutProbability); 1209 shortcutTargets.put("yyy", shortcutProbability); 1210 wordProperty = binaryDictionary.getWordProperty("aaa"); 1211 assertEquals(2, wordProperty.mShortcutTargets.size()); 1212 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1213 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1214 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1215 shortcutTarget.getProbability()); 1216 shortcutTargets.remove(shortcutTarget.mWord); 1217 } 1218 shortcutTargets.put("zzz", updatedShortcutProbability); 1219 shortcutTargets.put("yyy", shortcutProbability); 1220 binaryDictionary.flushWithGC(); 1221 wordProperty = binaryDictionary.getWordProperty("aaa"); 1222 assertEquals(2, wordProperty.mShortcutTargets.size()); 1223 for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1224 assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); 1225 assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), 1226 shortcutTarget.getProbability()); 1227 shortcutTargets.remove(shortcutTarget.mWord); 1228 } 1229 } 1230 1231 public void testAddManyShortcuts() { 1232 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1233 testAddManyShortcuts(formatVersion); 1234 } 1235 } 1236 1237 private void testAddManyShortcuts(final int formatVersion) { 1238 final long seed = System.currentTimeMillis(); 1239 final Random random = new Random(seed); 1240 final int UNIGRAM_COUNT = 1000; 1241 final int SHORTCUT_COUNT = 10000; 1242 final int codePointSetSize = 20; 1243 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1244 1245 final ArrayList<String> words = new ArrayList<>(); 1246 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1247 final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>(); 1248 1249 File dictFile = null; 1250 try { 1251 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1252 } catch (IOException e) { 1253 fail("IOException while writing an initial dictionary : " + e); 1254 } 1255 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1256 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1257 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1258 1259 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1260 final String word = CodePointUtils.generateWord(random, codePointSet); 1261 final int unigramProbability = random.nextInt(0xFF); 1262 addUnigramWord(binaryDictionary, word, unigramProbability); 1263 words.add(word); 1264 unigramProbabilities.put(word, unigramProbability); 1265 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1266 binaryDictionary.flushWithGC(); 1267 } 1268 } 1269 for (int i = 0; i < SHORTCUT_COUNT; i++) { 1270 final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet); 1271 final int shortcutProbability = random.nextInt(0xF); 1272 final String word = words.get(random.nextInt(words.size())); 1273 final int unigramProbability = unigramProbabilities.get(word); 1274 binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget, 1275 shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */, 1276 false /* isBlacklisted */, 0 /* timestamp */); 1277 if (shortcutTargets.containsKey(word)) { 1278 final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word); 1279 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1280 } else { 1281 final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>(); 1282 shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability); 1283 shortcutTargets.put(word, shortcutTargetsOfWord); 1284 } 1285 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1286 binaryDictionary.flushWithGC(); 1287 } 1288 } 1289 1290 for (final String word : words) { 1291 final WordProperty wordProperty = binaryDictionary.getWordProperty(word); 1292 assertEquals((int)unigramProbabilities.get(word), 1293 wordProperty.mProbabilityInfo.mProbability); 1294 if (!shortcutTargets.containsKey(word)) { 1295 // The word does not have shortcut targets. 1296 continue; 1297 } 1298 assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size()); 1299 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 1300 final String targetCodePonts = shortcutTarget.mWord; 1301 assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), 1302 shortcutTarget.getProbability()); 1303 } 1304 } 1305 } 1306 1307 public void testDictMigration() { 1308 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1309 testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1310 } 1311 } 1312 1313 private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1314 File dictFile = null; 1315 try { 1316 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1317 } catch (IOException e) { 1318 fail("IOException while writing an initial dictionary : " + e); 1319 } 1320 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1321 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1322 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1323 final int unigramProbability = 100; 1324 addUnigramWord(binaryDictionary, "aaa", unigramProbability); 1325 addUnigramWord(binaryDictionary, "bbb", unigramProbability); 1326 final int bigramProbability = 150; 1327 addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); 1328 final int shortcutProbability = 10; 1329 binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability, 1330 false /* isBeginningOfSentence */, false /* isNotAWord */, 1331 false /* isBlacklisted */, 0 /* timestamp */); 1332 binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */, 1333 Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */, 1334 true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */); 1335 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1336 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1337 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1338 assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); 1339 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1340 assertTrue(binaryDictionary.isValidDictionary()); 1341 assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); 1342 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 1343 assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); 1344 if (canCheckBigramProbability(toFormatVersion)) { 1345 assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb")); 1346 } 1347 assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb")); 1348 WordProperty wordProperty = binaryDictionary.getWordProperty("ccc"); 1349 assertEquals(1, wordProperty.mShortcutTargets.size()); 1350 assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord); 1351 wordProperty = binaryDictionary.getWordProperty("ddd"); 1352 assertTrue(wordProperty.mIsBlacklistEntry); 1353 assertTrue(wordProperty.mIsNotAWord); 1354 } 1355 1356 public void testLargeDictMigration() { 1357 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1358 testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion); 1359 } 1360 } 1361 1362 private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) { 1363 final int UNIGRAM_COUNT = 3000; 1364 final int BIGRAM_COUNT = 3000; 1365 final int codePointSetSize = 50; 1366 final long seed = System.currentTimeMillis(); 1367 final Random random = new Random(seed); 1368 1369 File dictFile = null; 1370 try { 1371 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); 1372 } catch (IOException e) { 1373 fail("IOException while writing an initial dictionary : " + e); 1374 } 1375 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1376 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1377 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1378 1379 final ArrayList<String> words = new ArrayList<>(); 1380 final ArrayList<Pair<String, String>> bigrams = new ArrayList<>(); 1381 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 1382 final HashMap<String, Integer> unigramProbabilities = new HashMap<>(); 1383 final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>(); 1384 1385 for (int i = 0; i < UNIGRAM_COUNT; i++) { 1386 final String word = CodePointUtils.generateWord(random, codePointSet); 1387 final int unigramProbability = random.nextInt(0xFF); 1388 addUnigramWord(binaryDictionary, word, unigramProbability); 1389 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1390 binaryDictionary.flushWithGC(); 1391 } 1392 words.add(word); 1393 unigramProbabilities.put(word, unigramProbability); 1394 } 1395 1396 for (int i = 0; i < BIGRAM_COUNT; i++) { 1397 final int word0Index = random.nextInt(words.size()); 1398 final int word1Index = random.nextInt(words.size()); 1399 if (word0Index == word1Index) { 1400 continue; 1401 } 1402 final String word0 = words.get(word0Index); 1403 final String word1 = words.get(word1Index); 1404 final int unigramProbability = unigramProbabilities.get(word1); 1405 final int bigramProbability = 1406 random.nextInt(0xFF - unigramProbability) + unigramProbability; 1407 addBigramWords(binaryDictionary, word0, word1, bigramProbability); 1408 if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 1409 binaryDictionary.flushWithGC(); 1410 } 1411 final Pair<String, String> bigram = new Pair<>(word0, word1); 1412 bigrams.add(bigram); 1413 bigramProbabilities.put(bigram, bigramProbability); 1414 } 1415 assertTrue(binaryDictionary.migrateTo(toFormatVersion)); 1416 1417 for (final String word : words) { 1418 assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word)); 1419 } 1420 assertEquals(unigramProbabilities.size(), Integer.parseInt( 1421 binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 1422 1423 for (final Pair<String, String> bigram : bigrams) { 1424 if (canCheckBigramProbability(toFormatVersion)) { 1425 assertEquals((int)bigramProbabilities.get(bigram), 1426 getBigramProbability(binaryDictionary, bigram.first, bigram.second)); 1427 } 1428 assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second)); 1429 } 1430 assertEquals(bigramProbabilities.size(), Integer.parseInt( 1431 binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY))); 1432 } 1433 1434 public void testBeginningOfSentence() { 1435 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1436 if (supportsBeginningOfSentence(formatVersion)) { 1437 testBeginningOfSentence(formatVersion); 1438 } 1439 } 1440 } 1441 1442 private void testBeginningOfSentence(final int formatVersion) { 1443 File dictFile = null; 1444 try { 1445 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1446 } catch (IOException e) { 1447 fail("IOException while writing an initial dictionary : " + e); 1448 } 1449 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1450 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1451 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1452 final int dummyProbability = 0; 1453 binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */, 1454 BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */, 1455 true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */, 1456 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1457 final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE; 1458 final int bigramProbability = 200; 1459 addUnigramWord(binaryDictionary, "aaa", dummyProbability); 1460 binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability, 1461 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1462 assertEquals(bigramProbability, 1463 binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa")); 1464 binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability, 1465 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1466 addUnigramWord(binaryDictionary, "bbb", dummyProbability); 1467 binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability, 1468 BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */); 1469 binaryDictionary.flushWithGC(); 1470 assertEquals(bigramProbability, 1471 binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa")); 1472 assertEquals(bigramProbability, 1473 binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb")); 1474 } 1475 1476 public void testGetMaxFrequencyOfExactMatches() { 1477 for (final int formatVersion : DICT_FORMAT_VERSIONS) { 1478 testGetMaxFrequencyOfExactMatches(formatVersion); 1479 } 1480 } 1481 1482 private void testGetMaxFrequencyOfExactMatches(final int formatVersion) { 1483 File dictFile = null; 1484 try { 1485 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion); 1486 } catch (IOException e) { 1487 fail("IOException while writing an initial dictionary : " + e); 1488 } 1489 final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 1490 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 1491 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 1492 addUnigramWord(binaryDictionary, "abc", 10); 1493 addUnigramWord(binaryDictionary, "aBc", 15); 1494 assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1495 addUnigramWord(binaryDictionary, "ab'c", 20); 1496 assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1497 addUnigramWord(binaryDictionary, "a-b-c", 25); 1498 assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1499 addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30); 1500 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1501 addUnigramWord(binaryDictionary, "ab c", 255); 1502 assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc")); 1503 } 1504} 1505