BinaryDictionaryTests.java revision b698e9c1fab9df8e1cd58f997ad62147522538fc
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.test.AndroidTestCase; 20import android.test.suitebuilder.annotation.LargeTest; 21import android.util.Pair; 22 23import com.android.inputmethod.latin.makedict.CodePointUtils; 24import com.android.inputmethod.latin.makedict.FormatSpec; 25 26import java.io.File; 27import java.io.IOException; 28import java.util.ArrayList; 29import java.util.HashMap; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.Map; 33import java.util.Random; 34 35@LargeTest 36public class BinaryDictionaryTests extends AndroidTestCase { 37 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 38 private static final String TEST_LOCALE = "test"; 39 40 @Override 41 protected void setUp() throws Exception { 42 super.setUp(); 43 } 44 45 @Override 46 protected void tearDown() throws Exception { 47 super.tearDown(); 48 } 49 50 private File createEmptyDictionaryAndGetFile(final String filename) throws IOException { 51 final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION, 52 getContext().getCacheDir()); 53 Map<String, String> attributeMap = new HashMap<String, String>(); 54 attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, 55 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 56 if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), 57 3 /* dictVersion */, attributeMap)) { 58 return file; 59 } else { 60 throw new IOException("Empty dictionary cannot be created."); 61 } 62 } 63 64 public void testIsValidDictionary() { 65 File dictFile = null; 66 try { 67 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 68 } catch (IOException e) { 69 fail("IOException while writing an initial dictionary : " + e); 70 } 71 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 72 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 73 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 74 assertTrue("binaryDictionary must be valid for existing valid dictionary file.", 75 binaryDictionary.isValidDictionary()); 76 binaryDictionary.close(); 77 assertFalse("binaryDictionary must be invalid after closing.", 78 binaryDictionary.isValidDictionary()); 79 dictFile.delete(); 80 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */, 81 dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), 82 TEST_LOCALE, true /* isUpdatable */); 83 assertFalse("binaryDictionary must be invalid for not existing dictionary file.", 84 binaryDictionary.isValidDictionary()); 85 binaryDictionary.close(); 86 } 87 88 public void testAddUnigramWord() { 89 File dictFile = null; 90 try { 91 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 92 } catch (IOException e) { 93 fail("IOException while writing an initial dictionary : " + e); 94 } 95 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 96 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 97 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 98 99 final int probability = 100; 100 binaryDictionary.addUnigramWord("aaa", probability); 101 // Reallocate and create. 102 binaryDictionary.addUnigramWord("aab", probability); 103 // Insert into children. 104 binaryDictionary.addUnigramWord("aac", probability); 105 // Make terminal. 106 binaryDictionary.addUnigramWord("aa", probability); 107 // Create children. 108 binaryDictionary.addUnigramWord("aaaa", probability); 109 // Reallocate and make termianl. 110 binaryDictionary.addUnigramWord("a", probability); 111 112 final int updatedProbability = 200; 113 // Update. 114 binaryDictionary.addUnigramWord("aaa", updatedProbability); 115 116 assertEquals(probability, binaryDictionary.getFrequency("aab")); 117 assertEquals(probability, binaryDictionary.getFrequency("aac")); 118 assertEquals(probability, binaryDictionary.getFrequency("aa")); 119 assertEquals(probability, binaryDictionary.getFrequency("aaaa")); 120 assertEquals(probability, binaryDictionary.getFrequency("a")); 121 assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa")); 122 123 dictFile.delete(); 124 } 125 126 public void testRandomlyAddUnigramWord() { 127 final int wordCount = 1000; 128 final int codePointSetSize = 50; 129 final int seed = 123456789; 130 131 File dictFile = null; 132 try { 133 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 134 } catch (IOException e) { 135 fail("IOException while writing an initial dictionary : " + e); 136 } 137 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 138 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 139 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 140 141 final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>(); 142 // Test a word that isn't contained within the dictionary. 143 final Random random = new Random(seed); 144 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 145 for (int i = 0; i < wordCount; ++i) { 146 final String word = CodePointUtils.generateWord(random, codePointSet); 147 probabilityMap.put(word, random.nextInt(0xFF)); 148 } 149 for (String word : probabilityMap.keySet()) { 150 binaryDictionary.addUnigramWord(word, probabilityMap.get(word)); 151 } 152 for (String word : probabilityMap.keySet()) { 153 assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word)); 154 } 155 dictFile.delete(); 156 } 157 158 public void testAddBigramWords() { 159 File dictFile = null; 160 try { 161 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 162 } catch (IOException e) { 163 fail("IOException while writing an initial dictionary : " + e); 164 } 165 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 166 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 167 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 168 169 final int unigramProbability = 100; 170 final int bigramProbability = 10; 171 final int updatedBigramProbability = 15; 172 binaryDictionary.addUnigramWord("aaa", unigramProbability); 173 binaryDictionary.addUnigramWord("abb", unigramProbability); 174 binaryDictionary.addUnigramWord("bcc", unigramProbability); 175 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 176 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 177 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 178 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 179 180 final int probability = binaryDictionary.calculateProbability(unigramProbability, 181 bigramProbability); 182 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 183 assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); 184 assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); 185 assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); 186 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); 187 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); 188 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); 189 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); 190 191 binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability); 192 final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability, 193 updatedBigramProbability); 194 assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb")); 195 196 assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); 197 assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); 198 assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); 199 assertEquals(Dictionary.NOT_A_PROBABILITY, 200 binaryDictionary.getBigramProbability("bcc", "aaa")); 201 assertEquals(Dictionary.NOT_A_PROBABILITY, 202 binaryDictionary.getBigramProbability("bcc", "bbc")); 203 assertEquals(Dictionary.NOT_A_PROBABILITY, 204 binaryDictionary.getBigramProbability("aaa", "aaa")); 205 206 // Testing bigram link. 207 binaryDictionary.addUnigramWord("abcde", unigramProbability); 208 binaryDictionary.addUnigramWord("fghij", unigramProbability); 209 binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability); 210 binaryDictionary.addUnigramWord("fgh", unigramProbability); 211 binaryDictionary.addUnigramWord("abc", unigramProbability); 212 binaryDictionary.addUnigramWord("f", unigramProbability); 213 assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij")); 214 assertEquals(Dictionary.NOT_A_PROBABILITY, 215 binaryDictionary.getBigramProbability("abcde", "fgh")); 216 binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability); 217 assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij")); 218 219 dictFile.delete(); 220 } 221 222 public void testRandomlyAddBigramWords() { 223 final int wordCount = 100; 224 final int bigramCount = 1000; 225 final int codePointSetSize = 50; 226 final int seed = 11111; 227 228 File dictFile = null; 229 try { 230 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 231 } catch (IOException e) { 232 fail("IOException while writing an initial dictionary : " + e); 233 } 234 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 235 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 236 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 237 final ArrayList<String> words = new ArrayList<String>(); 238 // Test a word that isn't contained within the dictionary. 239 final Random random = new Random(seed); 240 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 241 final int[] unigramProbabilities = new int[wordCount]; 242 for (int i = 0; i < wordCount; ++i) { 243 final String word = CodePointUtils.generateWord(random, codePointSet); 244 words.add(word); 245 final int unigramProbability = random.nextInt(0xFF); 246 unigramProbabilities[i] = unigramProbability; 247 binaryDictionary.addUnigramWord(word, unigramProbability); 248 } 249 250 final int[][] probabilities = new int[wordCount][wordCount]; 251 252 for (int i = 0; i < wordCount; ++i) { 253 for (int j = 0; j < wordCount; ++j) { 254 probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; 255 } 256 } 257 258 for (int i = 0; i < bigramCount; i++) { 259 final int word0Index = random.nextInt(wordCount); 260 final int word1Index = random.nextInt(wordCount); 261 final String word0 = words.get(word0Index); 262 final String word1 = words.get(word1Index); 263 final int bigramProbability = random.nextInt(0xF); 264 probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( 265 unigramProbabilities[word1Index], bigramProbability); 266 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 267 } 268 269 for (int i = 0; i < words.size(); i++) { 270 for (int j = 0; j < words.size(); j++) { 271 assertEquals(probabilities[i][j], 272 binaryDictionary.getBigramProbability(words.get(i), words.get(j))); 273 } 274 } 275 276 dictFile.delete(); 277 } 278 279 public void testRemoveBigramWords() { 280 File dictFile = null; 281 try { 282 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 283 } catch (IOException e) { 284 fail("IOException while writing an initial dictionary : " + e); 285 } 286 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 287 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 288 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 289 final int unigramProbability = 100; 290 final int bigramProbability = 10; 291 binaryDictionary.addUnigramWord("aaa", unigramProbability); 292 binaryDictionary.addUnigramWord("abb", unigramProbability); 293 binaryDictionary.addUnigramWord("bcc", unigramProbability); 294 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 295 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 296 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 297 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 298 299 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 300 assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc")); 301 assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa")); 302 assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc")); 303 304 binaryDictionary.removeBigramWords("aaa", "abb"); 305 assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb")); 306 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 307 assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb")); 308 309 310 binaryDictionary.removeBigramWords("aaa", "bcc"); 311 assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc")); 312 binaryDictionary.removeBigramWords("abb", "aaa"); 313 assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa")); 314 binaryDictionary.removeBigramWords("abb", "bcc"); 315 assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc")); 316 317 binaryDictionary.removeBigramWords("aaa", "abb"); 318 // Test remove non-existing bigram operation. 319 binaryDictionary.removeBigramWords("aaa", "abb"); 320 binaryDictionary.removeBigramWords("bcc", "aaa"); 321 322 dictFile.delete(); 323 } 324 325 public void testFlushDictionary() { 326 File dictFile = null; 327 try { 328 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 329 } catch (IOException e) { 330 fail("IOException while writing an initial dictionary : " + e); 331 } 332 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 333 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 334 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 335 336 final int probability = 100; 337 binaryDictionary.addUnigramWord("aaa", probability); 338 binaryDictionary.addUnigramWord("abcd", probability); 339 // Close without flushing. 340 binaryDictionary.close(); 341 342 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 343 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 344 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 345 346 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); 347 assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); 348 349 binaryDictionary.addUnigramWord("aaa", probability); 350 binaryDictionary.addUnigramWord("abcd", probability); 351 binaryDictionary.flush(); 352 binaryDictionary.close(); 353 354 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 355 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 356 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 357 358 assertEquals(probability, binaryDictionary.getFrequency("aaa")); 359 assertEquals(probability, binaryDictionary.getFrequency("abcd")); 360 binaryDictionary.addUnigramWord("bcde", probability); 361 binaryDictionary.flush(); 362 binaryDictionary.close(); 363 364 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 365 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 366 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 367 assertEquals(probability, binaryDictionary.getFrequency("bcde")); 368 binaryDictionary.close(); 369 370 dictFile.delete(); 371 } 372 373 public void testFlushWithGCDictionary() { 374 File dictFile = null; 375 try { 376 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 377 } catch (IOException e) { 378 fail("IOException while writing an initial dictionary : " + e); 379 } 380 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 381 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 382 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 383 384 final int unigramProbability = 100; 385 final int bigramProbability = 10; 386 binaryDictionary.addUnigramWord("aaa", unigramProbability); 387 binaryDictionary.addUnigramWord("abb", unigramProbability); 388 binaryDictionary.addUnigramWord("bcc", unigramProbability); 389 binaryDictionary.addBigramWords("aaa", "abb", bigramProbability); 390 binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability); 391 binaryDictionary.addBigramWords("abb", "aaa", bigramProbability); 392 binaryDictionary.addBigramWords("abb", "bcc", bigramProbability); 393 binaryDictionary.flushWithGC(); 394 binaryDictionary.close(); 395 396 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 397 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 398 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 399 final int probability = binaryDictionary.calculateProbability(unigramProbability, 400 bigramProbability); 401 assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); 402 assertEquals(unigramProbability, binaryDictionary.getFrequency("abb")); 403 assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc")); 404 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb")); 405 assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc")); 406 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa")); 407 assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc")); 408 assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa")); 409 assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc")); 410 assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa")); 411 binaryDictionary.flushWithGC(); 412 binaryDictionary.close(); 413 414 dictFile.delete(); 415 } 416 417 // TODO: Evaluate performance of GC 418 public void testAddBigramWordsAndFlashWithGC() { 419 final int wordCount = 100; 420 final int bigramCount = 1000; 421 final int codePointSetSize = 30; 422 // TODO: Use various seeds such as a current timestamp to make this test more random. 423 final int seed = 314159265; 424 425 File dictFile = null; 426 try { 427 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 428 } catch (IOException e) { 429 fail("IOException while writing an initial dictionary : " + e); 430 } 431 432 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 433 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 434 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 435 final ArrayList<String> words = new ArrayList<String>(); 436 // Test a word that isn't contained within the dictionary. 437 final Random random = new Random(seed); 438 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 439 final int[] unigramProbabilities = new int[wordCount]; 440 for (int i = 0; i < wordCount; ++i) { 441 final String word = CodePointUtils.generateWord(random, codePointSet); 442 words.add(word); 443 final int unigramProbability = random.nextInt(0xFF); 444 unigramProbabilities[i] = unigramProbability; 445 binaryDictionary.addUnigramWord(word, unigramProbability); 446 } 447 448 final int[][] probabilities = new int[wordCount][wordCount]; 449 450 for (int i = 0; i < wordCount; ++i) { 451 for (int j = 0; j < wordCount; ++j) { 452 probabilities[i][j] = Dictionary.NOT_A_PROBABILITY; 453 } 454 } 455 456 for (int i = 0; i < bigramCount; i++) { 457 final int word0Index = random.nextInt(wordCount); 458 final int word1Index = random.nextInt(wordCount); 459 final String word0 = words.get(word0Index); 460 final String word1 = words.get(word1Index); 461 final int bigramProbability = random.nextInt(0xF); 462 probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability( 463 unigramProbabilities[word1Index], bigramProbability); 464 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 465 } 466 467 binaryDictionary.flushWithGC(); 468 binaryDictionary.close(); 469 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 470 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 471 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 472 473 for (int i = 0; i < words.size(); i++) { 474 for (int j = 0; j < words.size(); j++) { 475 assertEquals(probabilities[i][j], 476 binaryDictionary.getBigramProbability(words.get(i), words.get(j))); 477 } 478 } 479 dictFile.delete(); 480 } 481 482 public void testRandomOperetionsAndFlashWithGC() { 483 final int flashWithGCIterationCount = 50; 484 final int operationCountInEachIteration = 200; 485 final int initialUnigramCount = 100; 486 final float addUnigramProb = 0.5f; 487 final float addBigramProb = 0.8f; 488 final float removeBigramProb = 0.2f; 489 final int codePointSetSize = 30; 490 final int seed = 141421356; 491 492 final Random random = new Random(seed); 493 494 File dictFile = null; 495 try { 496 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 497 } catch (IOException e) { 498 fail("IOException while writing an initial dictionary : " + e); 499 } 500 501 BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 502 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 503 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 504 final ArrayList<String> words = new ArrayList<String>(); 505 final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>(); 506 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 507 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 508 final HashMap<Pair<String, String>, Integer> bigramProbabilities = 509 new HashMap<Pair<String, String>, Integer>(); 510 for (int i = 0; i < initialUnigramCount; ++i) { 511 final String word = CodePointUtils.generateWord(random, codePointSet); 512 words.add(word); 513 final int unigramProbability = random.nextInt(0xFF); 514 unigramProbabilities.put(word, unigramProbability); 515 binaryDictionary.addUnigramWord(word, unigramProbability); 516 } 517 binaryDictionary.flushWithGC(); 518 binaryDictionary.close(); 519 520 for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) { 521 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 522 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 523 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 524 for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) { 525 // Add unigram. 526 if (random.nextFloat() < addUnigramProb) { 527 final String word = CodePointUtils.generateWord(random, codePointSet); 528 words.add(word); 529 final int unigramProbability = random.nextInt(0xFF); 530 unigramProbabilities.put(word, unigramProbability); 531 binaryDictionary.addUnigramWord(word, unigramProbability); 532 } 533 // Add bigram. 534 if (random.nextFloat() < addBigramProb && words.size() > 2) { 535 final int word0Index = random.nextInt(words.size()); 536 int word1Index = random.nextInt(words.size() - 1); 537 if (word0Index <= word1Index) { 538 word1Index++; 539 } 540 final String word0 = words.get(word0Index); 541 final String word1 = words.get(word1Index); 542 final int bigramProbability = random.nextInt(0xF); 543 final Pair<String, String> bigram = new Pair<String, String>(word0, word1); 544 bigramWords.add(bigram); 545 bigramProbabilities.put(bigram, bigramProbability); 546 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 547 } 548 // Remove bigram. 549 if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) { 550 final int bigramIndex = random.nextInt(bigramWords.size()); 551 final Pair<String, String> bigram = bigramWords.get(bigramIndex); 552 bigramWords.remove(bigramIndex); 553 bigramProbabilities.remove(bigram); 554 binaryDictionary.removeBigramWords(bigram.first, bigram.second); 555 } 556 } 557 558 // Test whether the all unigram operations are collectlly handled. 559 for (int i = 0; i < words.size(); i++) { 560 final String word = words.get(i); 561 final int unigramProbability = unigramProbabilities.get(word); 562 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 563 } 564 // Test whether the all bigram operations are collectlly handled. 565 for (int i = 0; i < bigramWords.size(); i++) { 566 final Pair<String, String> bigram = bigramWords.get(i); 567 final int unigramProbability = unigramProbabilities.get(bigram.second); 568 final int probability; 569 if (bigramProbabilities.containsKey(bigram)) { 570 final int bigramProbability = bigramProbabilities.get(bigram); 571 probability = binaryDictionary.calculateProbability(unigramProbability, 572 bigramProbability); 573 } else { 574 probability = Dictionary.NOT_A_PROBABILITY; 575 } 576 assertEquals(probability, 577 binaryDictionary.getBigramProbability(bigram.first, bigram.second)); 578 } 579 binaryDictionary.flushWithGC(); 580 binaryDictionary.close(); 581 } 582 583 dictFile.delete(); 584 } 585 586 public void testAddManyUnigramsAndFlushWithGC() { 587 final int flashWithGCIterationCount = 3; 588 final int codePointSetSize = 50; 589 final int seed = 22360679; 590 591 final Random random = new Random(seed); 592 593 File dictFile = null; 594 try { 595 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 596 } catch (IOException e) { 597 fail("IOException while writing an initial dictionary : " + e); 598 } 599 600 final ArrayList<String> words = new ArrayList<String>(); 601 final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>(); 602 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 603 604 BinaryDictionary binaryDictionary; 605 for (int i = 0; i < flashWithGCIterationCount; i++) { 606 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 607 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 608 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 609 while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) { 610 final String word = CodePointUtils.generateWord(random, codePointSet); 611 words.add(word); 612 final int unigramProbability = random.nextInt(0xFF); 613 unigramProbabilities.put(word, unigramProbability); 614 binaryDictionary.addUnigramWord(word, unigramProbability); 615 } 616 617 for (int j = 0; j < words.size(); j++) { 618 final String word = words.get(j); 619 final int unigramProbability = unigramProbabilities.get(word); 620 assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word)); 621 } 622 623 binaryDictionary.flushWithGC(); 624 binaryDictionary.close(); 625 } 626 627 dictFile.delete(); 628 } 629 630 public void testUnigramAndBigramCount() { 631 final int flashWithGCIterationCount = 10; 632 final int codePointSetSize = 50; 633 final int unigramCountPerIteration = 1000; 634 final int bigramCountPerIteration = 2000; 635 final int seed = 1123581321; 636 637 final Random random = new Random(seed); 638 639 File dictFile = null; 640 try { 641 dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary"); 642 } catch (IOException e) { 643 fail("IOException while writing an initial dictionary : " + e); 644 } 645 646 final ArrayList<String> words = new ArrayList<String>(); 647 final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>(); 648 final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random); 649 650 BinaryDictionary binaryDictionary; 651 for (int i = 0; i < flashWithGCIterationCount; i++) { 652 binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 653 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, 654 Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); 655 for (int j = 0; j < unigramCountPerIteration; j++) { 656 final String word = CodePointUtils.generateWord(random, codePointSet); 657 words.add(word); 658 final int unigramProbability = random.nextInt(0xFF); 659 binaryDictionary.addUnigramWord(word, unigramProbability); 660 } 661 for (int j = 0; j < bigramCountPerIteration; j++) { 662 final String word0 = words.get(random.nextInt(words.size())); 663 final String word1 = words.get(random.nextInt(words.size())); 664 bigrams.add(new Pair<String, String>(word0, word1)); 665 final int bigramProbability = random.nextInt(0xF); 666 binaryDictionary.addBigramWords(word0, word1, bigramProbability); 667 } 668 assertEquals(new HashSet<String>(words).size(), Integer.parseInt( 669 binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 670 assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( 671 binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); 672 binaryDictionary.flushWithGC(); 673 assertEquals(new HashSet<String>(words).size(), Integer.parseInt( 674 binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY))); 675 assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt( 676 binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY))); 677 binaryDictionary.close(); 678 } 679 680 dictFile.delete(); 681 } 682} 683