BinaryDictDecoderEncoderTests.java revision 107a5f6fb81a91a98fecd4c291aabb421e963291
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.makedict; 18 19import android.test.AndroidTestCase; 20import android.test.MoreAsserts; 21import android.test.suitebuilder.annotation.LargeTest; 22import android.util.Log; 23import android.util.SparseArray; 24 25import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 26import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 27import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; 28import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 29import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 30import com.android.inputmethod.latin.utils.CollectionUtils; 31 32import java.io.File; 33import java.io.FileInputStream; 34import java.io.FileNotFoundException; 35import java.io.FileOutputStream; 36import java.io.IOException; 37import java.util.ArrayList; 38import java.util.HashMap; 39import java.util.HashSet; 40import java.util.List; 41import java.util.Map; 42import java.util.Map.Entry; 43import java.util.Random; 44import java.util.Set; 45 46/** 47 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder. 48 */ 49@LargeTest 50public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 51 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 52 private static final int DEFAULT_MAX_UNIGRAMS = 100; 53 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 54 private static final int UNIGRAM_FREQ = 10; 55 private static final int BIGRAM_FREQ = 50; 56 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 57 58 private static final int USE_BYTE_ARRAY = 1; 59 private static final int USE_BYTE_BUFFER = 2; 60 61 private static final List<String> sWords = CollectionUtils.newArrayList(); 62 private static final SparseArray<List<Integer>> sEmptyBigrams = 63 CollectionUtils.newSparseArray(); 64 private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); 65 private static final SparseArray<List<Integer>> sChainBigrams = 66 CollectionUtils.newSparseArray(); 67 68 private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); 69 private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = 70 new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); 71 private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = 72 new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); 73 74 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 75 76 public BinaryDictDecoderEncoderTests() { 77 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 78 } 79 80 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 81 super(); 82 Log.e(TAG, "Testing dictionary: seed is " + seed); 83 final Random random = new Random(seed); 84 sWords.clear(); 85 final int[] codePointSet = generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, random); 86 generateWords(maxUnigrams, random, codePointSet); 87 88 for (int i = 0; i < sWords.size(); ++i) { 89 sChainBigrams.put(i, new ArrayList<Integer>()); 90 if (i > 0) { 91 sChainBigrams.get(i - 1).add(i); 92 } 93 } 94 95 sStarBigrams.put(0, new ArrayList<Integer>()); 96 for (int i = 1; i < sWords.size(); ++i) { 97 sStarBigrams.get(0).add(i); 98 } 99 } 100 101 private int[] generateCodePointSet(final int codePointSetSize, final Random random) { 102 final int[] codePointSet = new int[codePointSetSize]; 103 for (int i = codePointSet.length - 1; i >= 0; ) { 104 final int r = Math.abs(random.nextInt()); 105 if (r < 0) continue; 106 // Don't insert 0~0x20, but insert any other code point. 107 // Code points are in the range 0~0x10FFFF. 108 final int candidateCodePoint = (int)(0x20 + r % (Character.MAX_CODE_POINT - 0x20)); 109 // Code points between MIN_ and MAX_SURROGATE are not valid on their own. 110 if (candidateCodePoint >= Character.MIN_SURROGATE 111 && candidateCodePoint <= Character.MAX_SURROGATE) continue; 112 codePointSet[i] = candidateCodePoint; 113 --i; 114 } 115 return codePointSet; 116 } 117 118 // Utilities for test 119 120 /** 121 * Makes new DictBuffer according to BUFFER_TYPE. 122 */ 123 private void getDictBuffer(final Ver3DictDecoder dictDecoder, final int bufferType) 124 throws FileNotFoundException, IOException { 125 if (bufferType == USE_BYTE_BUFFER) { 126 dictDecoder.openDictBuffer( 127 new Ver3DictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); 128 } else if (bufferType == USE_BYTE_ARRAY) { 129 dictDecoder.openDictBuffer( 130 new Ver3DictDecoder.DictionaryBufferFromByteArrayFactory()); 131 } 132 } 133 134 /** 135 * Generates a random word. 136 */ 137 private String generateWord(final Random random, final int[] codePointSet) { 138 StringBuilder builder = new StringBuilder(); 139 // 8 * 4 = 32 chars max, but we do it the following way so as to bias the random toward 140 // longer words. This should be closer to natural language, and more importantly, it will 141 // exercise the algorithms in dicttool much more. 142 final int count = 1 + (Math.abs(random.nextInt()) % 5) 143 + (Math.abs(random.nextInt()) % 5) 144 + (Math.abs(random.nextInt()) % 5) 145 + (Math.abs(random.nextInt()) % 5) 146 + (Math.abs(random.nextInt()) % 5) 147 + (Math.abs(random.nextInt()) % 5) 148 + (Math.abs(random.nextInt()) % 5) 149 + (Math.abs(random.nextInt()) % 5); 150 while (builder.length() < count) { 151 builder.appendCodePoint(codePointSet[Math.abs(random.nextInt()) % codePointSet.length]); 152 } 153 return builder.toString(); 154 } 155 156 private void generateWords(final int number, final Random random, final int[] codePointSet) { 157 final Set<String> wordSet = CollectionUtils.newHashSet(); 158 while (wordSet.size() < number) { 159 wordSet.add(generateWord(random, codePointSet)); 160 } 161 sWords.addAll(wordSet); 162 } 163 164 /** 165 * Adds unigrams to the dictionary. 166 */ 167 private void addUnigrams(final int number, final FusionDictionary dict, 168 final List<String> words, final Map<String, List<String>> shortcutMap) { 169 for (int i = 0; i < number; ++i) { 170 final String word = words.get(i); 171 final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); 172 if (shortcutMap != null && shortcutMap.containsKey(word)) { 173 for (final String shortcut : shortcutMap.get(word)) { 174 shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); 175 } 176 } 177 dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, 178 false /* isNotAWord */); 179 } 180 } 181 182 private void addBigrams(final FusionDictionary dict, 183 final List<String> words, 184 final SparseArray<List<Integer>> bigrams) { 185 for (int i = 0; i < bigrams.size(); ++i) { 186 final int w1 = bigrams.keyAt(i); 187 for (int w2 : bigrams.valueAt(i)) { 188 dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); 189 } 190 } 191 } 192 193// The following is useful to dump the dictionary into a textual file, but it can't compile 194// on-device, so it's commented out. 195// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 196// throws IOException { 197// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 198// new java.io.FileWriter(new File(filename)), dict); 199// } 200 201 private long timeWritingDictToFile(final File file, final FusionDictionary dict, 202 final FormatSpec.FormatOptions formatOptions) { 203 204 long now = -1, diff = -1; 205 206 try { 207 final FileOutputStream out = new FileOutputStream(file); 208 209 now = System.currentTimeMillis(); 210 // If you need to dump the dict to a textual file, uncomment the line below and the 211 // function above 212 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 213 BinaryDictEncoder.writeDictionaryBinary(out, dict, formatOptions); 214 diff = System.currentTimeMillis() - now; 215 216 out.flush(); 217 out.close(); 218 } catch (IOException e) { 219 Log.e(TAG, "IO exception while writing file", e); 220 } catch (UnsupportedFormatException e) { 221 Log.e(TAG, "UnsupportedFormatException", e); 222 } 223 224 return diff; 225 } 226 227 private void checkDictionary(final FusionDictionary dict, final List<String> words, 228 final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap) { 229 assertNotNull(dict); 230 231 // check unigram 232 for (final String word : words) { 233 final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 234 assertNotNull(cg); 235 } 236 237 // check bigram 238 for (int i = 0; i < bigrams.size(); ++i) { 239 final int w1 = bigrams.keyAt(i); 240 for (final int w2 : bigrams.valueAt(i)) { 241 final CharGroup cg = FusionDictionary.findWordInTree(dict.mRootNodeArray, 242 words.get(w1)); 243 assertNotNull(words.get(w1) + "," + words.get(w2), cg.getBigram(words.get(w2))); 244 } 245 } 246 247 // check shortcut 248 if (shortcutMap != null) { 249 for (final Map.Entry<String, List<String>> entry : shortcutMap.entrySet()) { 250 final CharGroup group = FusionDictionary.findWordInTree(dict.mRootNodeArray, 251 entry.getKey()); 252 for (final String word : entry.getValue()) { 253 assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, 254 group.getShortcut(word)); 255 } 256 } 257 } 258 } 259 260 private String outputOptions(final int bufferType, 261 final FormatSpec.FormatOptions formatOptions) { 262 String result = " : buffer type = " 263 + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 264 result += " : version = " + formatOptions.mVersion; 265 return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; 266 } 267 268 // Tests for readDictionaryBinary and writeDictionaryBinary 269 270 private long timeReadingAndCheckDict(final File file, final List<String> words, 271 final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap, 272 final int bufferType) { 273 long now, diff = -1; 274 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); 275 276 FusionDictionary dict = null; 277 try { 278 getDictBuffer(dictDecoder, bufferType); 279 assertNotNull(dictDecoder.getDictBuffer()); 280 now = System.currentTimeMillis(); 281 dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null); 282 diff = System.currentTimeMillis() - now; 283 } catch (IOException e) { 284 Log.e(TAG, "IOException while reading dictionary", e); 285 } catch (UnsupportedFormatException e) { 286 Log.e(TAG, "Unsupported format", e); 287 } 288 289 checkDictionary(dict, words, bigrams, shortcutMap); 290 return diff; 291 } 292 293 // Tests for readDictionaryBinary and writeDictionaryBinary 294 private String runReadAndWrite(final List<String> words, 295 final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcuts, 296 final int bufferType, final FormatSpec.FormatOptions formatOptions, 297 final String message) { 298 File file = null; 299 try { 300 file = File.createTempFile("runReadAndWrite", TEST_DICT_FILE_EXTENSION, 301 getContext().getCacheDir()); 302 } catch (IOException e) { 303 Log.e(TAG, "IOException", e); 304 } 305 assertNotNull(file); 306 307 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 308 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false)); 309 addUnigrams(words.size(), dict, words, shortcuts); 310 addBigrams(dict, words, bigrams); 311 checkDictionary(dict, words, bigrams, shortcuts); 312 313 final long write = timeWritingDictToFile(file, dict, formatOptions); 314 final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); 315 316 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 317 + " : " + outputOptions(bufferType, formatOptions); 318 } 319 320 private void runReadAndWriteTests(final List<String> results, final int bufferType, 321 final FormatSpec.FormatOptions formatOptions) { 322 results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, 323 formatOptions, "unigram")); 324 results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, 325 formatOptions, "chain")); 326 results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, 327 formatOptions, "star")); 328 } 329 330 public void testReadAndWriteWithByteBuffer() { 331 final List<String> results = CollectionUtils.newArrayList(); 332 333 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); 334 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 335 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 336 337 for (final String result : results) { 338 Log.d(TAG, result); 339 } 340 } 341 342 public void testReadAndWriteWithByteArray() { 343 final List<String> results = CollectionUtils.newArrayList(); 344 345 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); 346 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 347 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 348 349 for (final String result : results) { 350 Log.d(TAG, result); 351 } 352 } 353 354 // Tests for readUnigramsAndBigramsBinary 355 356 private void checkWordMap(final List<String> expectedWords, 357 final SparseArray<List<Integer>> expectedBigrams, 358 final Map<Integer, String> resultWords, 359 final Map<Integer, Integer> resultFrequencies, 360 final Map<Integer, ArrayList<PendingAttribute>> resultBigrams) { 361 // check unigrams 362 final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); 363 final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); 364 assertEquals(actualWordsSet, expectedWordsSet); 365 366 for (int freq : resultFrequencies.values()) { 367 assertEquals(freq, UNIGRAM_FREQ); 368 } 369 370 // check bigrams 371 final Map<String, List<String>> expBigrams = new HashMap<String, List<String>>(); 372 for (int i = 0; i < expectedBigrams.size(); ++i) { 373 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 374 for (int w2 : expectedBigrams.valueAt(i)) { 375 if (expBigrams.get(word1) == null) { 376 expBigrams.put(word1, new ArrayList<String>()); 377 } 378 expBigrams.get(word1).add(expectedWords.get(w2)); 379 } 380 } 381 382 final Map<String, List<String>> actBigrams = new HashMap<String, List<String>>(); 383 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 384 final String word1 = resultWords.get(entry.getKey()); 385 final int unigramFreq = resultFrequencies.get(entry.getKey()); 386 for (PendingAttribute attr : entry.getValue()) { 387 final String word2 = resultWords.get(attr.mAddress); 388 if (actBigrams.get(word1) == null) { 389 actBigrams.put(word1, new ArrayList<String>()); 390 } 391 actBigrams.get(word1).add(word2); 392 393 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 394 unigramFreq, attr.mFrequency); 395 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 396 } 397 } 398 399 assertEquals(actBigrams, expBigrams); 400 } 401 402 private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, 403 final SparseArray<List<Integer>> bigrams, final int bufferType) { 404 FileInputStream inStream = null; 405 406 final Map<Integer, String> resultWords = CollectionUtils.newTreeMap(); 407 final Map<Integer, ArrayList<PendingAttribute>> resultBigrams = 408 CollectionUtils.newTreeMap(); 409 final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); 410 411 long now = -1, diff = -1; 412 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); 413 try { 414 getDictBuffer(dictDecoder, bufferType); 415 assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer()); 416 now = System.currentTimeMillis(); 417 BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs, 418 resultBigrams); 419 diff = System.currentTimeMillis() - now; 420 } catch (IOException e) { 421 Log.e(TAG, "IOException", e); 422 } catch (UnsupportedFormatException e) { 423 Log.e(TAG, "UnsupportedFormatException", e); 424 } finally { 425 if (inStream != null) { 426 try { 427 inStream.close(); 428 } catch (IOException e) { 429 // do nothing 430 } 431 } 432 } 433 434 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); 435 return diff; 436 } 437 438 private String runReadUnigramsAndBigramsBinary(final List<String> words, 439 final SparseArray<List<Integer>> bigrams, final int bufferType, 440 final FormatSpec.FormatOptions formatOptions, final String message) { 441 File file = null; 442 try { 443 file = File.createTempFile("runReadUnigrams", TEST_DICT_FILE_EXTENSION, 444 getContext().getCacheDir()); 445 } catch (IOException e) { 446 Log.e(TAG, "IOException", e); 447 } 448 assertNotNull(file); 449 450 // making the dictionary from lists of words. 451 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 452 new FusionDictionary.DictionaryOptions( 453 new HashMap<String, String>(), false, false)); 454 addUnigrams(words.size(), dict, words, null /* shortcutMap */); 455 addBigrams(dict, words, bigrams); 456 457 timeWritingDictToFile(file, dict, formatOptions); 458 459 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType); 460 long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, 461 bufferType); 462 463 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 464 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 465 } 466 467 private void runReadUnigramsAndBigramsTests(final List<String> results, final int bufferType, 468 final FormatSpec.FormatOptions formatOptions) { 469 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 470 formatOptions, "unigram")); 471 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 472 formatOptions, "chain")); 473 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 474 formatOptions, "star")); 475 } 476 477 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 478 final List<String> results = CollectionUtils.newArrayList(); 479 480 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); 481 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 482 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 483 484 for (final String result : results) { 485 Log.d(TAG, result); 486 } 487 } 488 489 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 490 final List<String> results = CollectionUtils.newArrayList(); 491 492 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); 493 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 494 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 495 496 for (final String result : results) { 497 Log.d(TAG, result); 498 } 499 } 500 501 // Tests for getTerminalPosition 502 private String getWordFromBinary(final Ver3DictDecoder dictDecoder, final int address) { 503 final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); 504 if (dictBuffer.position() != 0) dictBuffer.position(0); 505 506 FileHeader fileHeader = null; 507 try { 508 fileHeader = dictDecoder.readHeader(); 509 } catch (IOException e) { 510 return null; 511 } catch (UnsupportedFormatException e) { 512 return null; 513 } 514 if (fileHeader == null) return null; 515 return BinaryDictDecoderUtils.getWordAtAddress(dictDecoder, fileHeader.mHeaderSize, 516 address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord; 517 } 518 519 private long runGetTerminalPosition(final Ver3DictDecoder dictDecoder, final String word, 520 int index, boolean contained) { 521 final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; 522 long diff = -1; 523 int position = -1; 524 try { 525 final long now = System.nanoTime(); 526 position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word); 527 diff = System.nanoTime() - now; 528 } catch (IOException e) { 529 Log.e(TAG, "IOException while getTerminalPosition", e); 530 } catch (UnsupportedFormatException e) { 531 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 532 } 533 534 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 535 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 536 return diff; 537 } 538 539 public void testGetTerminalPosition() { 540 File file = null; 541 try { 542 file = File.createTempFile("testGetTerminalPosition", TEST_DICT_FILE_EXTENSION, 543 getContext().getCacheDir()); 544 } catch (IOException e) { 545 // do nothing 546 } 547 assertNotNull(file); 548 549 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 550 new FusionDictionary.DictionaryOptions( 551 new HashMap<String, String>(), false, false)); 552 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 553 timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); 554 555 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); 556 try { 557 dictDecoder.openDictBuffer( 558 new Ver3DictDecoder.DictionaryBufferFromByteArrayFactory()); 559 } catch (IOException e) { 560 // ignore 561 Log.e(TAG, "IOException while opening the buffer", e); 562 } 563 assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer()); 564 565 try { 566 // too long word 567 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 568 assertEquals(FormatSpec.NOT_VALID_WORD, 569 BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord)); 570 571 // null 572 assertEquals(FormatSpec.NOT_VALID_WORD, 573 BinaryDictIOUtils.getTerminalPosition(dictDecoder, null)); 574 575 // empty string 576 assertEquals(FormatSpec.NOT_VALID_WORD, 577 BinaryDictIOUtils.getTerminalPosition(dictDecoder, "")); 578 } catch (IOException e) { 579 } catch (UnsupportedFormatException e) { 580 } 581 582 // Test a word that is contained within the dictionary. 583 long sum = 0; 584 for (int i = 0; i < sWords.size(); ++i) { 585 final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true); 586 sum += time == -1 ? 0 : time; 587 } 588 Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); 589 590 // Test a word that isn't contained within the dictionary. 591 final Random random = new Random((int)System.currentTimeMillis()); 592 final int[] codePointSet = generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, random); 593 for (int i = 0; i < 1000; ++i) { 594 final String word = generateWord(random, codePointSet); 595 if (sWords.indexOf(word) != -1) continue; 596 runGetTerminalPosition(dictDecoder, word, i, false); 597 } 598 } 599 600 public void testDeleteWord() { 601 File file = null; 602 try { 603 file = File.createTempFile("testDeleteWord", TEST_DICT_FILE_EXTENSION, 604 getContext().getCacheDir()); 605 } catch (IOException e) { 606 // do nothing 607 } 608 assertNotNull(file); 609 610 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 611 new FusionDictionary.DictionaryOptions( 612 new HashMap<String, String>(), false, false)); 613 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 614 timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); 615 616 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file); 617 try { 618 dictDecoder.openDictBuffer( 619 new Ver3DictDecoder.DictionaryBufferFromByteArrayFactory()); 620 } catch (IOException e) { 621 // ignore 622 Log.e(TAG, "IOException while opening the buffer", e); 623 } 624 assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer()); 625 626 try { 627 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 628 BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0))); 629 DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0)); 630 assertEquals(FormatSpec.NOT_VALID_WORD, 631 BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0))); 632 633 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 634 BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5))); 635 DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5)); 636 assertEquals(FormatSpec.NOT_VALID_WORD, 637 BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5))); 638 } catch (IOException e) { 639 } catch (UnsupportedFormatException e) { 640 } 641 } 642} 643