BinaryDictDecoderEncoderTests.java revision fa946d4a0f6329134ddbae9a3ce03f6d4009019b
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.makedict; 18 19import android.test.AndroidTestCase; 20import android.test.MoreAsserts; 21import android.test.suitebuilder.annotation.LargeTest; 22import android.util.Log; 23import android.util.SparseArray; 24 25import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 26import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 27import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 28import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 29import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; 30import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 31import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 32import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 33import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 34import com.android.inputmethod.latin.utils.CollectionUtils; 35 36import java.io.File; 37import java.io.FileInputStream; 38import java.io.IOException; 39import java.util.ArrayList; 40import java.util.Arrays; 41import java.util.HashMap; 42import java.util.HashSet; 43import java.util.List; 44import java.util.Map.Entry; 45import java.util.Random; 46import java.util.Set; 47import java.util.TreeMap; 48 49/** 50 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 51 */ 52@LargeTest 53public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 54 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 55 private static final int DEFAULT_MAX_UNIGRAMS = 100; 56 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 57 private static final int UNIGRAM_FREQ = 10; 58 private static final int BIGRAM_FREQ = 50; 59 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 60 private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; 61 private static final int NUM_OF_SHORTCUTS = 5; 62 63 private static final int USE_BYTE_ARRAY = 1; 64 private static final int USE_BYTE_BUFFER = 2; 65 66 private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); 67 private static final SparseArray<List<Integer>> sEmptyBigrams = 68 CollectionUtils.newSparseArray(); 69 private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); 70 private static final SparseArray<List<Integer>> sChainBigrams = 71 CollectionUtils.newSparseArray(); 72 private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); 73 74 private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2); 75 private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE = 76 new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */); 77 private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE = 78 new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */); 79 private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE = 80 new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */); 81 private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE = 82 new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); 83 84 private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; 85 86 public BinaryDictDecoderEncoderTests() { 87 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 88 } 89 90 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 91 super(); 92 Log.e(TAG, "Testing dictionary: seed is " + seed); 93 final Random random = new Random(seed); 94 sWords.clear(); 95 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 96 random); 97 generateWords(maxUnigrams, random, codePointSet); 98 99 for (int i = 0; i < sWords.size(); ++i) { 100 sChainBigrams.put(i, new ArrayList<Integer>()); 101 if (i > 0) { 102 sChainBigrams.get(i - 1).add(i); 103 } 104 } 105 106 sStarBigrams.put(0, new ArrayList<Integer>()); 107 // MAX - 1 because we added one above already 108 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 109 for (int i = 1; i < maxBigrams; ++i) { 110 sStarBigrams.get(0).add(i); 111 } 112 113 sShortcuts.clear(); 114 for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { 115 final int from = Math.abs(random.nextInt()) % sWords.size(); 116 sShortcuts.put(sWords.get(from), new ArrayList<String>()); 117 for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { 118 final int to = Math.abs(random.nextInt()) % sWords.size(); 119 sShortcuts.get(sWords.get(from)).add(sWords.get(to)); 120 } 121 } 122 } 123 124 private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) { 125 if (formatOptions.mVersion == FormatSpec.VERSION4) { 126 return new Ver4DictEncoder(getContext().getCacheDir()); 127 } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) { 128 return new Ver3DictEncoder(file); 129 } else { 130 throw new RuntimeException("The format option has a wrong version : " 131 + formatOptions.mVersion); 132 } 133 } 134 135 private void generateWords(final int number, final Random random, final int[] codePointSet) { 136 final Set<String> wordSet = CollectionUtils.newHashSet(); 137 while (wordSet.size() < number) { 138 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 139 } 140 sWords.addAll(wordSet); 141 } 142 143 /** 144 * Adds unigrams to the dictionary. 145 */ 146 private void addUnigrams(final int number, final FusionDictionary dict, 147 final List<String> words, final HashMap<String, List<String>> shortcutMap) { 148 for (int i = 0; i < number; ++i) { 149 final String word = words.get(i); 150 final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); 151 if (shortcutMap != null && shortcutMap.containsKey(word)) { 152 for (final String shortcut : shortcutMap.get(word)) { 153 shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); 154 } 155 } 156 dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts, 157 false /* isNotAWord */); 158 } 159 } 160 161 private void addBigrams(final FusionDictionary dict, 162 final List<String> words, 163 final SparseArray<List<Integer>> bigrams) { 164 for (int i = 0; i < bigrams.size(); ++i) { 165 final int w1 = bigrams.keyAt(i); 166 for (int w2 : bigrams.valueAt(i)) { 167 dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ); 168 } 169 } 170 } 171 172// The following is useful to dump the dictionary into a textual file, but it can't compile 173// on-device, so it's commented out. 174// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 175// throws IOException { 176// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 177// new java.io.FileWriter(new File(filename)), dict); 178// } 179 180 private long timeWritingDictToFile(final File file, final FusionDictionary dict, 181 final FormatSpec.FormatOptions formatOptions) { 182 183 long now = -1, diff = -1; 184 185 try { 186 final DictEncoder dictEncoder = getDictEncoder(file, formatOptions); 187 188 now = System.currentTimeMillis(); 189 // If you need to dump the dict to a textual file, uncomment the line below and the 190 // function above 191 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 192 dictEncoder.writeDictionary(dict, formatOptions); 193 diff = System.currentTimeMillis() - now; 194 } catch (IOException e) { 195 Log.e(TAG, "IO exception while writing file", e); 196 } catch (UnsupportedFormatException e) { 197 Log.e(TAG, "UnsupportedFormatException", e); 198 } 199 200 return diff; 201 } 202 203 private void checkDictionary(final FusionDictionary dict, final List<String> words, 204 final SparseArray<List<Integer>> bigrams, 205 final HashMap<String, List<String>> shortcutMap) { 206 assertNotNull(dict); 207 208 // check unigram 209 for (final String word : words) { 210 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 211 assertNotNull(ptNode); 212 } 213 214 // check bigram 215 for (int i = 0; i < bigrams.size(); ++i) { 216 final int w1 = bigrams.keyAt(i); 217 for (final int w2 : bigrams.valueAt(i)) { 218 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 219 words.get(w1)); 220 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 221 } 222 } 223 224 // check shortcut 225 if (shortcutMap != null) { 226 for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { 227 assertTrue(words.contains(entry.getKey())); 228 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 229 entry.getKey()); 230 for (final String word : entry.getValue()) { 231 assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, 232 ptNode.getShortcut(word)); 233 } 234 } 235 } 236 } 237 238 private String outputOptions(final int bufferType, 239 final FormatSpec.FormatOptions formatOptions) { 240 String result = " : buffer type = " 241 + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 242 result += " : version = " + formatOptions.mVersion; 243 return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate; 244 } 245 246 private DictionaryOptions getDictionaryOptions(final String id, final String version) { 247 final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(), 248 false, false); 249 options.mAttributes.put("version", version); 250 options.mAttributes.put("dictionary", id); 251 return options; 252 } 253 254 private File setUpDictionaryFile(final String name, final String version) { 255 File file = null; 256 try { 257 file = new File(getContext().getCacheDir(), name + "." + version 258 + TEST_DICT_FILE_EXTENSION); 259 file.createNewFile(); 260 } catch (IOException e) { 261 // do nothing 262 } 263 assertTrue("Failed to create the dictionary file.", file.exists()); 264 return file; 265 } 266 267 private DictDecoder getDictDecoder(final File file, final int bufferType, 268 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 269 if (formatOptions.mVersion == FormatSpec.VERSION4) { 270 final FileHeader header = new FileHeader(0, dictOptions, formatOptions); 271 return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(), 272 header.getId() + "." + header.getVersion()), bufferType); 273 } else { 274 return FormatSpec.getDictDecoder(file, bufferType); 275 } 276 } 277 // Tests for readDictionaryBinary and writeDictionaryBinary 278 279 private long timeReadingAndCheckDict(final File file, final List<String> words, 280 final SparseArray<List<Integer>> bigrams, 281 final HashMap<String, List<String>> shortcutMap, final int bufferType, 282 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 283 long now, diff = -1; 284 285 FusionDictionary dict = null; 286 try { 287 final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, 288 dictOptions); 289 now = System.currentTimeMillis(); 290 dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); 291 diff = System.currentTimeMillis() - now; 292 } catch (IOException e) { 293 Log.e(TAG, "IOException while reading dictionary", e); 294 } catch (UnsupportedFormatException e) { 295 Log.e(TAG, "Unsupported format", e); 296 } 297 298 checkDictionary(dict, words, bigrams, shortcutMap); 299 return diff; 300 } 301 302 // Tests for readDictionaryBinary and writeDictionaryBinary 303 private String runReadAndWrite(final List<String> words, 304 final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, 305 final int bufferType, final FormatSpec.FormatOptions formatOptions, 306 final String message) { 307 308 final String dictName = "runReadAndWrite"; 309 final String dictVersion = Long.toString(System.currentTimeMillis()); 310 final File file = setUpDictionaryFile(dictName, dictVersion); 311 312 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 313 getDictionaryOptions(dictName, dictVersion)); 314 addUnigrams(words.size(), dict, words, shortcuts); 315 addBigrams(dict, words, bigrams); 316 checkDictionary(dict, words, bigrams, shortcuts); 317 318 final long write = timeWritingDictToFile(file, dict, formatOptions); 319 final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType, 320 formatOptions, dict.mOptions); 321 322 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 323 + " : " + outputOptions(bufferType, formatOptions); 324 } 325 326 private void runReadAndWriteTests(final List<String> results, final int bufferType, 327 final FormatSpec.FormatOptions formatOptions) { 328 results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, 329 formatOptions, "unigram")); 330 results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, 331 formatOptions, "chain")); 332 results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, 333 formatOptions, "star")); 334 results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions, 335 "unigram with shortcuts")); 336 results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions, 337 "chain with shortcuts")); 338 results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions, 339 "star with shortcuts")); 340 } 341 342 // Unit test for CharEncoding.readString and CharEncoding.writeString. 343 public void testCharEncoding() { 344 // the max length of a word in sWords is less than 50. 345 // See generateWords. 346 final byte[] buffer = new byte[50 * 3]; 347 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 348 for (final String word : sWords) { 349 Log.d("testReadAndWriteString", "write : " + word); 350 Arrays.fill(buffer, (byte)0); 351 CharEncoding.writeString(buffer, 0, word); 352 dictBuffer.position(0); 353 final String str = CharEncoding.readString(dictBuffer); 354 assertEquals(word, str); 355 } 356 } 357 358 public void testReadAndWriteWithByteBuffer() { 359 final List<String> results = CollectionUtils.newArrayList(); 360 361 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2); 362 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 363 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 364 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 365 runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 366 367 for (final String result : results) { 368 Log.d(TAG, result); 369 } 370 } 371 372 public void testReadAndWriteWithByteArray() { 373 final List<String> results = CollectionUtils.newArrayList(); 374 375 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2); 376 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 377 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 378 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 379 runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 380 381 for (final String result : results) { 382 Log.d(TAG, result); 383 } 384 } 385 386 // Tests for readUnigramsAndBigramsBinary 387 388 private void checkWordMap(final List<String> expectedWords, 389 final SparseArray<List<Integer>> expectedBigrams, 390 final TreeMap<Integer, String> resultWords, 391 final TreeMap<Integer, Integer> resultFrequencies, 392 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) { 393 // check unigrams 394 final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); 395 final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); 396 assertEquals(actualWordsSet, expectedWordsSet); 397 398 for (int freq : resultFrequencies.values()) { 399 assertEquals(freq, UNIGRAM_FREQ); 400 } 401 402 // check bigrams 403 final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>(); 404 for (int i = 0; i < expectedBigrams.size(); ++i) { 405 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 406 for (int w2 : expectedBigrams.valueAt(i)) { 407 if (expBigrams.get(word1) == null) { 408 expBigrams.put(word1, new ArrayList<String>()); 409 } 410 expBigrams.get(word1).add(expectedWords.get(w2)); 411 } 412 } 413 414 final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>(); 415 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 416 final String word1 = resultWords.get(entry.getKey()); 417 final int unigramFreq = resultFrequencies.get(entry.getKey()); 418 for (PendingAttribute attr : entry.getValue()) { 419 final String word2 = resultWords.get(attr.mAddress); 420 if (actBigrams.get(word1) == null) { 421 actBigrams.put(word1, new ArrayList<String>()); 422 } 423 actBigrams.get(word1).add(word2); 424 425 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 426 unigramFreq, attr.mFrequency); 427 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 428 } 429 } 430 431 assertEquals(actBigrams, expBigrams); 432 } 433 434 private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, 435 final SparseArray<List<Integer>> bigrams, final int bufferType, 436 final FormatOptions formatOptions, final DictionaryOptions dictOptions) { 437 FileInputStream inStream = null; 438 439 final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); 440 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = 441 CollectionUtils.newTreeMap(); 442 final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); 443 444 long now = -1, diff = -1; 445 try { 446 final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions, 447 dictOptions); 448 now = System.currentTimeMillis(); 449 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 450 diff = System.currentTimeMillis() - now; 451 } catch (IOException e) { 452 Log.e(TAG, "IOException", e); 453 } catch (UnsupportedFormatException e) { 454 Log.e(TAG, "UnsupportedFormatException", e); 455 } finally { 456 if (inStream != null) { 457 try { 458 inStream.close(); 459 } catch (IOException e) { 460 // do nothing 461 } 462 } 463 } 464 465 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams); 466 return diff; 467 } 468 469 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 470 final SparseArray<List<Integer>> bigrams, final int bufferType, 471 final FormatSpec.FormatOptions formatOptions, final String message) { 472 final String dictName = "runReadUnigrams"; 473 final String dictVersion = Long.toString(System.currentTimeMillis()); 474 final File file = setUpDictionaryFile(dictName, dictVersion); 475 476 // making the dictionary from lists of words. 477 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 478 getDictionaryOptions(dictName, dictVersion)); 479 addUnigrams(words.size(), dict, words, null /* shortcutMap */); 480 addBigrams(dict, words, bigrams); 481 482 timeWritingDictToFile(file, dict, formatOptions); 483 484 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 485 formatOptions, dict.mOptions); 486 long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, 487 bufferType, formatOptions, dict.mOptions); 488 489 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 490 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 491 } 492 493 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 494 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 495 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 496 formatOptions, "unigram")); 497 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 498 formatOptions, "chain")); 499 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 500 formatOptions, "star")); 501 } 502 503 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 504 final ArrayList<String> results = CollectionUtils.newArrayList(); 505 506 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2); 507 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 508 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 509 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 510 runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 511 512 for (final String result : results) { 513 Log.d(TAG, result); 514 } 515 } 516 517 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 518 final ArrayList<String> results = CollectionUtils.newArrayList(); 519 520 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2); 521 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 522 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 523 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 524 runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 525 526 for (final String result : results) { 527 Log.d(TAG, result); 528 } 529 } 530 531 // Tests for getTerminalPosition 532 private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 533 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 534 535 FileHeader fileHeader = null; 536 try { 537 fileHeader = dictDecoder.readHeader(); 538 } catch (IOException e) { 539 return null; 540 } catch (UnsupportedFormatException e) { 541 return null; 542 } 543 if (fileHeader == null) return null; 544 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize, 545 address, fileHeader.mFormatOptions).mWord; 546 } 547 548 private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 549 int index, boolean contained) { 550 final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; 551 long diff = -1; 552 int position = -1; 553 try { 554 final long now = System.nanoTime(); 555 position = dictDecoder.getTerminalPosition(word); 556 diff = System.nanoTime() - now; 557 } catch (IOException e) { 558 Log.e(TAG, "IOException while getTerminalPosition", e); 559 } catch (UnsupportedFormatException e) { 560 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 561 } 562 563 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 564 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 565 return diff; 566 } 567 568 private void runGetTerminalPosition(final ArrayList<String> words, 569 final SparseArray<List<Integer>> bigrams, final int bufferType, 570 final FormatOptions formatOptions, final String message) { 571 final String dictName = "testGetTerminalPosition"; 572 final String dictVersion = Long.toString(System.currentTimeMillis()); 573 final File file = setUpDictionaryFile(dictName, dictVersion); 574 575 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 576 getDictionaryOptions(dictName, dictVersion)); 577 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 578 addBigrams(dict, words, bigrams); 579 timeWritingDictToFile(file, dict, formatOptions); 580 581 final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY, 582 formatOptions, dict.mOptions); 583 try { 584 dictDecoder.openDictBuffer(); 585 } catch (IOException e) { 586 // ignore 587 Log.e(TAG, "IOException while opening the buffer", e); 588 } 589 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 590 591 try { 592 // too long word 593 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 594 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 595 596 // null 597 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 598 599 // empty string 600 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 601 } catch (IOException e) { 602 } catch (UnsupportedFormatException e) { 603 } 604 605 // Test a word that is contained within the dictionary. 606 long sum = 0; 607 for (int i = 0; i < sWords.size(); ++i) { 608 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true); 609 sum += time == -1 ? 0 : time; 610 } 611 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 612 + " : " + outputOptions(bufferType, formatOptions)); 613 614 // Test a word that isn't contained within the dictionary. 615 final Random random = new Random((int)System.currentTimeMillis()); 616 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 617 random); 618 for (int i = 0; i < 1000; ++i) { 619 final String word = CodePointUtils.generateWord(random, codePointSet); 620 if (sWords.indexOf(word) != -1) continue; 621 checkGetTerminalPosition(dictDecoder, word, i, false); 622 } 623 } 624 625 private void runGetTerminalPositionTests(final ArrayList<String> results, final int bufferType, 626 final FormatOptions formatOptions) { 627 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 628 } 629 630 public void testGetTerminalPosition() { 631 final ArrayList<String> results = CollectionUtils.newArrayList(); 632 633 runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2); 634 runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE); 635 runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE); 636 runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE); 637 runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE); 638 639 runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2); 640 runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE); 641 runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE); 642 runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE); 643 runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE); 644 645 for (final String result : results) { 646 Log.d(TAG, result); 647 } 648 } 649 650 public void testDeleteWord() { 651 final String dictName = "testDeleteWord"; 652 final String dictVersion = Long.toString(System.currentTimeMillis()); 653 final File file = setUpDictionaryFile(dictName, dictVersion); 654 655 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 656 new FusionDictionary.DictionaryOptions( 657 new HashMap<String, String>(), false, false)); 658 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 659 timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); 660 661 final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, DictDecoder.USE_BYTEARRAY); 662 try { 663 dictDecoder.openDictBuffer(); 664 } catch (IOException e) { 665 // ignore 666 Log.e(TAG, "IOException while opening the buffer", e); 667 } 668 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 669 670 try { 671 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 672 dictDecoder.getTerminalPosition(sWords.get(0))); 673 DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0)); 674 assertEquals(FormatSpec.NOT_VALID_WORD, 675 dictDecoder.getTerminalPosition(sWords.get(0))); 676 677 MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, 678 dictDecoder.getTerminalPosition(sWords.get(5))); 679 DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5)); 680 assertEquals(FormatSpec.NOT_VALID_WORD, 681 dictDecoder.getTerminalPosition(sWords.get(5))); 682 } catch (IOException e) { 683 } catch (UnsupportedFormatException e) { 684 } 685 } 686} 687