BinaryDictDecoderEncoderTests.java revision 8ffc631826b108423f98e3ff4d987f067cbc4e0c
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.makedict; 18 19import android.test.AndroidTestCase; 20import android.test.suitebuilder.annotation.LargeTest; 21import android.util.Log; 22import android.util.SparseArray; 23 24import com.android.inputmethod.latin.BinaryDictionary; 25import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 26import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 27import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 28import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 29import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 30import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 31import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 32import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 33import com.android.inputmethod.latin.utils.CollectionUtils; 34 35import java.io.File; 36import java.io.IOException; 37import java.util.ArrayList; 38import java.util.Arrays; 39import java.util.HashMap; 40import java.util.HashSet; 41import java.util.List; 42import java.util.Map.Entry; 43import java.util.Random; 44import java.util.Set; 45import java.util.TreeMap; 46 47/** 48 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 49 */ 50@LargeTest 51public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 52 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 53 private static final int DEFAULT_MAX_UNIGRAMS = 300; 54 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 55 private static final int LARGE_CODE_POINT_SET_SIZE = 300; 56 private static final int UNIGRAM_FREQ = 10; 57 private static final int BIGRAM_FREQ = 50; 58 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 59 private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; 60 private static final int NUM_OF_SHORTCUTS = 5; 61 62 private static final ArrayList<String> sWords = CollectionUtils.newArrayList(); 63 private static final ArrayList<String> sWordsWithVariousCodePoints = 64 CollectionUtils.newArrayList(); 65 private static final SparseArray<List<Integer>> sEmptyBigrams = 66 CollectionUtils.newSparseArray(); 67 private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray(); 68 private static final SparseArray<List<Integer>> sChainBigrams = 69 CollectionUtils.newSparseArray(); 70 private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap(); 71 72 public BinaryDictDecoderEncoderTests() { 73 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 74 } 75 76 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 77 super(); 78 BinaryDictionary.setCurrentTimeForTest(0); 79 Log.e(TAG, "Testing dictionary: seed is " + seed); 80 final Random random = new Random(seed); 81 sWords.clear(); 82 sWordsWithVariousCodePoints.clear(); 83 generateWords(maxUnigrams, random); 84 85 for (int i = 0; i < sWords.size(); ++i) { 86 sChainBigrams.put(i, new ArrayList<Integer>()); 87 if (i > 0) { 88 sChainBigrams.get(i - 1).add(i); 89 } 90 } 91 92 sStarBigrams.put(0, new ArrayList<Integer>()); 93 // MAX - 1 because we added one above already 94 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 95 for (int i = 1; i < maxBigrams; ++i) { 96 sStarBigrams.get(0).add(i); 97 } 98 99 sShortcuts.clear(); 100 for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { 101 final int from = Math.abs(random.nextInt()) % sWords.size(); 102 sShortcuts.put(sWords.get(from), new ArrayList<String>()); 103 for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { 104 final int to = Math.abs(random.nextInt()) % sWords.size(); 105 sShortcuts.get(sWords.get(from)).add(sWords.get(to)); 106 } 107 } 108 } 109 110 @Override 111 protected void setUp() throws Exception { 112 super.setUp(); 113 BinaryDictionary.setCurrentTimeForTest(0); 114 } 115 116 @Override 117 protected void tearDown() throws Exception { 118 super.tearDown(); 119 // Quit test mode. 120 BinaryDictionary.setCurrentTimeForTest(-1); 121 } 122 123 private void generateWords(final int number, final Random random) { 124 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 125 random); 126 final Set<String> wordSet = CollectionUtils.newHashSet(); 127 while (wordSet.size() < number) { 128 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 129 } 130 sWords.addAll(wordSet); 131 132 final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( 133 LARGE_CODE_POINT_SET_SIZE, random); 134 wordSet.clear(); 135 while (wordSet.size() < number) { 136 wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); 137 } 138 sWordsWithVariousCodePoints.addAll(wordSet); 139 } 140 141 /** 142 * Adds unigrams to the dictionary. 143 */ 144 private void addUnigrams(final int number, final FusionDictionary dict, 145 final List<String> words, final HashMap<String, List<String>> shortcutMap) { 146 for (int i = 0; i < number; ++i) { 147 final String word = words.get(i); 148 final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList(); 149 if (shortcutMap != null && shortcutMap.containsKey(word)) { 150 for (final String shortcut : shortcutMap.get(word)) { 151 shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); 152 } 153 } 154 dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), 155 (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); 156 } 157 } 158 159 private void addBigrams(final FusionDictionary dict, 160 final List<String> words, 161 final SparseArray<List<Integer>> bigrams) { 162 for (int i = 0; i < bigrams.size(); ++i) { 163 final int w1 = bigrams.keyAt(i); 164 for (int w2 : bigrams.valueAt(i)) { 165 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); 166 } 167 } 168 } 169 170// The following is useful to dump the dictionary into a textual file, but it can't compile 171// on-device, so it's commented out. 172// private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 173// throws IOException { 174// com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 175// new java.io.FileWriter(new File(filename)), dict); 176// } 177 178 private long timeWritingDictToFile(final File file, final FusionDictionary dict, 179 final FormatSpec.FormatOptions formatOptions) { 180 181 long now = -1, diff = -1; 182 183 try { 184 final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); 185 186 now = System.currentTimeMillis(); 187 // If you need to dump the dict to a textual file, uncomment the line below and the 188 // function above 189 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 190 dictEncoder.writeDictionary(dict, formatOptions); 191 diff = System.currentTimeMillis() - now; 192 } catch (IOException e) { 193 Log.e(TAG, "IO exception while writing file", e); 194 } catch (UnsupportedFormatException e) { 195 Log.e(TAG, "UnsupportedFormatException", e); 196 } 197 198 return diff; 199 } 200 201 private void checkDictionary(final FusionDictionary dict, final List<String> words, 202 final SparseArray<List<Integer>> bigrams, 203 final HashMap<String, List<String>> shortcutMap) { 204 assertNotNull(dict); 205 206 // check unigram 207 for (final String word : words) { 208 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 209 assertNotNull(ptNode); 210 } 211 212 // check bigram 213 for (int i = 0; i < bigrams.size(); ++i) { 214 final int w1 = bigrams.keyAt(i); 215 for (final int w2 : bigrams.valueAt(i)) { 216 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 217 words.get(w1)); 218 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 219 } 220 } 221 222 // check shortcut 223 if (shortcutMap != null) { 224 for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { 225 assertTrue(words.contains(entry.getKey())); 226 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 227 entry.getKey()); 228 for (final String word : entry.getValue()) { 229 assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, 230 ptNode.getShortcut(word)); 231 } 232 } 233 } 234 } 235 236 private String outputOptions(final int bufferType, 237 final FormatSpec.FormatOptions formatOptions) { 238 String result = " : buffer type = " 239 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 240 return result + " : version = " + formatOptions.mVersion; 241 } 242 243 // Tests for readDictionaryBinary and writeDictionaryBinary 244 245 private long timeReadingAndCheckDict(final File file, final List<String> words, 246 final SparseArray<List<Integer>> bigrams, 247 final HashMap<String, List<String>> shortcutMap, final int bufferType) { 248 long now, diff = -1; 249 250 FusionDictionary dict = null; 251 try { 252 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); 253 now = System.currentTimeMillis(); 254 dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); 255 diff = System.currentTimeMillis() - now; 256 } catch (IOException e) { 257 Log.e(TAG, "IOException while reading dictionary", e); 258 } catch (UnsupportedFormatException e) { 259 Log.e(TAG, "Unsupported format", e); 260 } 261 262 checkDictionary(dict, words, bigrams, shortcutMap); 263 return diff; 264 } 265 266 // Tests for readDictionaryBinary and writeDictionaryBinary 267 private String runReadAndWrite(final List<String> words, 268 final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, 269 final int bufferType, final FormatSpec.FormatOptions formatOptions, 270 final String message) { 271 272 final String dictName = "runReadAndWrite"; 273 final String dictVersion = Long.toString(System.currentTimeMillis()); 274 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 275 getContext().getCacheDir()); 276 277 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 278 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 279 addUnigrams(words.size(), dict, words, shortcuts); 280 addBigrams(dict, words, bigrams); 281 checkDictionary(dict, words, bigrams, shortcuts); 282 283 final long write = timeWritingDictToFile(file, dict, formatOptions); 284 final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); 285 286 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 287 + " : " + outputOptions(bufferType, formatOptions); 288 } 289 290 private void runReadAndWriteTests(final List<String> results, final int bufferType, 291 final FormatSpec.FormatOptions formatOptions) { 292 results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, 293 formatOptions, "unigram")); 294 results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, 295 formatOptions, "chain")); 296 results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, 297 formatOptions, "star")); 298 results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions, 299 "unigram with shortcuts")); 300 results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions, 301 "chain with shortcuts")); 302 results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions, 303 "star with shortcuts")); 304 results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, 305 null /* shortcuts */, bufferType, formatOptions, 306 "unigram with various code points")); 307 } 308 309 // Unit test for CharEncoding.readString and CharEncoding.writeString. 310 public void testCharEncoding() { 311 // the max length of a word in sWords is less than 50. 312 // See generateWords. 313 final byte[] buffer = new byte[50 * 3]; 314 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 315 for (final String word : sWords) { 316 Arrays.fill(buffer, (byte) 0); 317 CharEncoding.writeString(buffer, 0, word); 318 dictBuffer.position(0); 319 final String str = CharEncoding.readString(dictBuffer); 320 assertEquals(word, str); 321 } 322 } 323 324 public void testReadAndWriteWithByteBuffer() { 325 final List<String> results = CollectionUtils.newArrayList(); 326 327 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 328 BinaryDictUtils.VERSION2_OPTIONS); 329 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 330 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); 331 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 332 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); 333 for (final String result : results) { 334 Log.d(TAG, result); 335 } 336 } 337 338 public void testReadAndWriteWithByteArray() { 339 final List<String> results = CollectionUtils.newArrayList(); 340 341 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 342 BinaryDictUtils.VERSION2_OPTIONS); 343 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 344 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); 345 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 346 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); 347 348 for (final String result : results) { 349 Log.d(TAG, result); 350 } 351 } 352 353 // Tests for readUnigramsAndBigramsBinary 354 355 private void checkWordMap(final List<String> expectedWords, 356 final SparseArray<List<Integer>> expectedBigrams, 357 final TreeMap<Integer, String> resultWords, 358 final TreeMap<Integer, Integer> resultFrequencies, 359 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, 360 final boolean checkProbability) { 361 // check unigrams 362 final Set<String> actualWordsSet = new HashSet<String>(resultWords.values()); 363 final Set<String> expectedWordsSet = new HashSet<String>(expectedWords); 364 assertEquals(actualWordsSet, expectedWordsSet); 365 if (checkProbability) { 366 for (int freq : resultFrequencies.values()) { 367 assertEquals(freq, UNIGRAM_FREQ); 368 } 369 } 370 371 // check bigrams 372 final HashMap<String, Set<String>> expBigrams = new HashMap<String, Set<String>>(); 373 for (int i = 0; i < expectedBigrams.size(); ++i) { 374 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 375 for (int w2 : expectedBigrams.valueAt(i)) { 376 if (expBigrams.get(word1) == null) { 377 expBigrams.put(word1, new HashSet<String>()); 378 } 379 expBigrams.get(word1).add(expectedWords.get(w2)); 380 } 381 } 382 383 final HashMap<String, Set<String>> actBigrams = new HashMap<String, Set<String>>(); 384 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 385 final String word1 = resultWords.get(entry.getKey()); 386 final int unigramFreq = resultFrequencies.get(entry.getKey()); 387 for (PendingAttribute attr : entry.getValue()) { 388 final String word2 = resultWords.get(attr.mAddress); 389 if (actBigrams.get(word1) == null) { 390 actBigrams.put(word1, new HashSet<String>()); 391 } 392 actBigrams.get(word1).add(word2); 393 394 if (checkProbability) { 395 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 396 unigramFreq, attr.mFrequency); 397 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 398 } 399 } 400 } 401 assertEquals(actBigrams, expBigrams); 402 } 403 404 private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, 405 final SparseArray<List<Integer>> bigrams, final int bufferType, 406 final boolean checkProbability) { 407 final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap(); 408 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = 409 CollectionUtils.newTreeMap(); 410 final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); 411 412 long now = -1, diff = -1; 413 try { 414 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType); 415 now = System.currentTimeMillis(); 416 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 417 diff = System.currentTimeMillis() - now; 418 } catch (IOException e) { 419 Log.e(TAG, "IOException", e); 420 } catch (UnsupportedFormatException e) { 421 Log.e(TAG, "UnsupportedFormatException", e); 422 } 423 424 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); 425 return diff; 426 } 427 428 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 429 final SparseArray<List<Integer>> bigrams, final int bufferType, 430 final FormatSpec.FormatOptions formatOptions, final String message) { 431 final String dictName = "runReadUnigrams"; 432 final String dictVersion = Long.toString(System.currentTimeMillis()); 433 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 434 getContext().getCacheDir()); 435 436 // making the dictionary from lists of words. 437 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 438 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 439 addUnigrams(words.size(), dict, words, null /* shortcutMap */); 440 addBigrams(dict, words, bigrams); 441 442 timeWritingDictToFile(file, dict, formatOptions); 443 444 // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the 445 // probability when there's a timestamp for the entry. 446 // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. 447 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 448 !formatOptions.mHasTimestamp /* checkProbability */); 449 long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, 450 bufferType); 451 452 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 453 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 454 } 455 456 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 457 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 458 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 459 formatOptions, "unigram")); 460 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 461 formatOptions, "chain")); 462 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 463 formatOptions, "star")); 464 } 465 466 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 467 final ArrayList<String> results = CollectionUtils.newArrayList(); 468 469 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 470 BinaryDictUtils.VERSION2_OPTIONS); 471 472 for (final String result : results) { 473 Log.d(TAG, result); 474 } 475 } 476 477 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 478 final ArrayList<String> results = CollectionUtils.newArrayList(); 479 480 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 481 BinaryDictUtils.VERSION2_OPTIONS); 482 483 for (final String result : results) { 484 Log.d(TAG, result); 485 } 486 } 487 488 // Tests for getTerminalPosition 489 private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 490 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 491 492 DictionaryHeader fileHeader = null; 493 try { 494 fileHeader = dictDecoder.readHeader(); 495 } catch (IOException e) { 496 return null; 497 } catch (UnsupportedFormatException e) { 498 return null; 499 } 500 if (fileHeader == null) return null; 501 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, 502 address, fileHeader.mFormatOptions).mWord; 503 } 504 505 private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 506 final boolean contained) { 507 long diff = -1; 508 int position = -1; 509 try { 510 final long now = System.nanoTime(); 511 position = dictDecoder.getTerminalPosition(word); 512 diff = System.nanoTime() - now; 513 } catch (IOException e) { 514 Log.e(TAG, "IOException while getTerminalPosition", e); 515 } catch (UnsupportedFormatException e) { 516 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 517 } 518 519 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 520 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 521 return diff; 522 } 523 524 private void runGetTerminalPosition(final ArrayList<String> words, 525 final SparseArray<List<Integer>> bigrams, final int bufferType, 526 final FormatOptions formatOptions, final String message) { 527 final String dictName = "testGetTerminalPosition"; 528 final String dictVersion = Long.toString(System.currentTimeMillis()); 529 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 530 getContext().getCacheDir()); 531 532 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 533 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 534 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 535 addBigrams(dict, words, bigrams); 536 timeWritingDictToFile(file, dict, formatOptions); 537 538 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY); 539 try { 540 dictDecoder.openDictBuffer(); 541 } catch (IOException e) { 542 Log.e(TAG, "IOException while opening the buffer", e); 543 } catch (UnsupportedFormatException e) { 544 Log.e(TAG, "IOException while opening the buffer", e); 545 } 546 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 547 548 try { 549 // too long word 550 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 551 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 552 553 // null 554 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 555 556 // empty string 557 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 558 } catch (IOException e) { 559 } catch (UnsupportedFormatException e) { 560 } 561 562 // Test a word that is contained within the dictionary. 563 long sum = 0; 564 for (int i = 0; i < sWords.size(); ++i) { 565 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); 566 sum += time == -1 ? 0 : time; 567 } 568 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 569 + " : " + outputOptions(bufferType, formatOptions)); 570 571 // Test a word that isn't contained within the dictionary. 572 final Random random = new Random((int)System.currentTimeMillis()); 573 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 574 random); 575 for (int i = 0; i < 1000; ++i) { 576 final String word = CodePointUtils.generateWord(random, codePointSet); 577 if (sWords.indexOf(word) != -1) continue; 578 checkGetTerminalPosition(dictDecoder, word, false); 579 } 580 } 581 582 private void runGetTerminalPositionTests(final int bufferType, 583 final FormatOptions formatOptions) { 584 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 585 } 586 587 public void testGetTerminalPosition() { 588 final ArrayList<String> results = CollectionUtils.newArrayList(); 589 590 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, 591 BinaryDictUtils.VERSION2_OPTIONS); 592 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, 593 BinaryDictUtils.VERSION2_OPTIONS); 594 595 for (final String result : results) { 596 Log.d(TAG, result); 597 } 598 } 599} 600