BinaryDictDecoderEncoderTests.java revision 88fa47a27d45f6460971d0d223aa558e121b3478
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.util.Log;
22import android.util.Pair;
23import android.util.SparseArray;
24
25import com.android.inputmethod.latin.BinaryDictionary;
26import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
27import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
28import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
29import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
30import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
31import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
32import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
33
34import java.io.File;
35import java.io.IOException;
36import java.util.ArrayList;
37import java.util.Arrays;
38import java.util.HashMap;
39import java.util.HashSet;
40import java.util.List;
41import java.util.Locale;
42import java.util.Map.Entry;
43import java.util.Random;
44import java.util.Set;
45import java.util.TreeMap;
46
47/**
48 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
49 */
50@LargeTest
51public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
52    private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
53    private static final int DEFAULT_MAX_UNIGRAMS = 300;
54    private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
55    private static final int LARGE_CODE_POINT_SET_SIZE = 300;
56    private static final int UNIGRAM_FREQ = 10;
57    private static final int BIGRAM_FREQ = 50;
58    private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
59    private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
60    private static final int NUM_OF_SHORTCUTS = 5;
61
62    private static final ArrayList<String> sWords = new ArrayList<>();
63    private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>();
64    private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>();
65    private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>();
66    private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
67    private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
68
69    public BinaryDictDecoderEncoderTests() {
70        this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
71    }
72
73    public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
74        super();
75        BinaryDictionaryUtils.setCurrentTimeForTest(0);
76        Log.e(TAG, "Testing dictionary: seed is " + seed);
77        final Random random = new Random(seed);
78        sWords.clear();
79        sWordsWithVariousCodePoints.clear();
80        generateWords(maxUnigrams, random);
81
82        for (int i = 0; i < sWords.size(); ++i) {
83            sChainBigrams.put(i, new ArrayList<Integer>());
84            if (i > 0) {
85                sChainBigrams.get(i - 1).add(i);
86            }
87        }
88
89        sStarBigrams.put(0, new ArrayList<Integer>());
90        // MAX - 1 because we added one above already
91        final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
92        for (int i = 1; i < maxBigrams; ++i) {
93            sStarBigrams.get(0).add(i);
94        }
95
96        sShortcuts.clear();
97        for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
98            final int from = Math.abs(random.nextInt()) % sWords.size();
99            sShortcuts.put(sWords.get(from), new ArrayList<String>());
100            for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
101                final int to = Math.abs(random.nextInt()) % sWords.size();
102                sShortcuts.get(sWords.get(from)).add(sWords.get(to));
103            }
104        }
105    }
106
107    @Override
108    protected void setUp() throws Exception {
109        super.setUp();
110        BinaryDictionaryUtils.setCurrentTimeForTest(0);
111    }
112
113    @Override
114    protected void tearDown() throws Exception {
115        // Quit test mode.
116        BinaryDictionaryUtils.setCurrentTimeForTest(-1);
117        super.tearDown();
118    }
119
120    private void generateWords(final int number, final Random random) {
121        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
122                random);
123        final Set<String> wordSet = new HashSet<>();
124        while (wordSet.size() < number) {
125            wordSet.add(CodePointUtils.generateWord(random, codePointSet));
126        }
127        sWords.addAll(wordSet);
128
129        final int[] largeCodePointSet = CodePointUtils.generateCodePointSet(
130                LARGE_CODE_POINT_SET_SIZE, random);
131        wordSet.clear();
132        while (wordSet.size() < number) {
133            wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet));
134        }
135        sWordsWithVariousCodePoints.addAll(wordSet);
136    }
137
138    /**
139     * Adds unigrams to the dictionary.
140     */
141    private void addUnigrams(final int number, final FusionDictionary dict,
142            final List<String> words, final HashMap<String, List<String>> shortcutMap) {
143        for (int i = 0; i < number; ++i) {
144            final String word = words.get(i);
145            final ArrayList<WeightedString> shortcuts = new ArrayList<>();
146            if (shortcutMap != null && shortcutMap.containsKey(word)) {
147                for (final String shortcut : shortcutMap.get(word)) {
148                    shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
149                }
150            }
151            dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
152                    (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
153        }
154    }
155
156    private void addBigrams(final FusionDictionary dict,
157            final List<String> words,
158            final SparseArray<List<Integer>> bigrams) {
159        for (int i = 0; i < bigrams.size(); ++i) {
160            final int w1 = bigrams.keyAt(i);
161            for (int w2 : bigrams.valueAt(i)) {
162                dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ));
163            }
164        }
165    }
166
167//    The following is useful to dump the dictionary into a textual file, but it can't compile
168//    on-device, so it's commented out.
169//    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
170//            throws IOException {
171//        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
172//                new java.io.FileWriter(new File(filename)), dict);
173//    }
174
175    private long timeWritingDictToFile(final File file, final FusionDictionary dict,
176            final FormatSpec.FormatOptions formatOptions) {
177
178        long now = -1, diff = -1;
179
180        try {
181            final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
182
183            now = System.currentTimeMillis();
184            // If you need to dump the dict to a textual file, uncomment the line below and the
185            // function above
186            // dumpToCombinedFileForDebug(file, "/tmp/foo");
187            dictEncoder.writeDictionary(dict, formatOptions);
188            diff = System.currentTimeMillis() - now;
189        } catch (IOException e) {
190            Log.e(TAG, "IO exception while writing file", e);
191        } catch (UnsupportedFormatException e) {
192            Log.e(TAG, "UnsupportedFormatException", e);
193        }
194
195        return diff;
196    }
197
198    private void checkDictionary(final FusionDictionary dict, final List<String> words,
199            final SparseArray<List<Integer>> bigrams,
200            final HashMap<String, List<String>> shortcutMap) {
201        assertNotNull(dict);
202
203        // check unigram
204        for (final String word : words) {
205            final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
206            assertNotNull(ptNode);
207        }
208
209        // check bigram
210        for (int i = 0; i < bigrams.size(); ++i) {
211            final int w1 = bigrams.keyAt(i);
212            for (final int w2 : bigrams.valueAt(i)) {
213                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
214                        words.get(w1));
215                assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
216            }
217        }
218
219        // check shortcut
220        if (shortcutMap != null) {
221            for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
222                assertTrue(words.contains(entry.getKey()));
223                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
224                        entry.getKey());
225                for (final String word : entry.getValue()) {
226                    assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
227                            ptNode.getShortcut(word));
228                }
229            }
230        }
231    }
232
233    private String outputOptions(final int bufferType,
234            final FormatSpec.FormatOptions formatOptions) {
235        String result = " : buffer type = "
236                + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
237        return result + " : version = " + formatOptions.mVersion;
238    }
239
240    // Tests for readDictionaryBinary and writeDictionaryBinary
241
242    private long timeReadingAndCheckDict(final File file, final List<String> words,
243            final SparseArray<List<Integer>> bigrams,
244            final HashMap<String, List<String>> shortcutMap, final int bufferType) {
245        long now, diff = -1;
246
247        FusionDictionary dict = null;
248        try {
249            final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
250                    bufferType);
251            now = System.currentTimeMillis();
252            dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
253            diff  = System.currentTimeMillis() - now;
254        } catch (IOException e) {
255            Log.e(TAG, "IOException while reading dictionary", e);
256        } catch (UnsupportedFormatException e) {
257            Log.e(TAG, "Unsupported format", e);
258        }
259
260        checkDictionary(dict, words, bigrams, shortcutMap);
261        return diff;
262    }
263
264    // Tests for readDictionaryBinary and writeDictionaryBinary
265    private String runReadAndWrite(final List<String> words,
266            final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
267            final int bufferType, final FormatSpec.FormatOptions formatOptions,
268            final String message) {
269
270        final String dictName = "runReadAndWrite";
271        final String dictVersion = Long.toString(System.currentTimeMillis());
272        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
273                getContext().getCacheDir());
274
275        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
276                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
277        addUnigrams(words.size(), dict, words, shortcuts);
278        addBigrams(dict, words, bigrams);
279        checkDictionary(dict, words, bigrams, shortcuts);
280
281        final long write = timeWritingDictToFile(file, dict, formatOptions);
282        final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType);
283
284        return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
285                + " : " + outputOptions(bufferType, formatOptions);
286    }
287
288    private void runReadAndWriteTests(final List<String> results, final int bufferType,
289            final FormatSpec.FormatOptions formatOptions) {
290        results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
291                formatOptions, "unigram"));
292        results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
293                formatOptions, "chain"));
294        results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
295                formatOptions, "star"));
296        results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
297                "unigram with shortcuts"));
298        results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
299                "chain with shortcuts"));
300        results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
301                "star with shortcuts"));
302        results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams,
303                null /* shortcuts */, bufferType, formatOptions,
304                "unigram with various code points"));
305    }
306
307    // Unit test for CharEncoding.readString and CharEncoding.writeString.
308    public void testCharEncoding() {
309        // the max length of a word in sWords is less than 50.
310        // See generateWords.
311        final byte[] buffer = new byte[50 * 3];
312        final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
313        for (final String word : sWords) {
314            Arrays.fill(buffer, (byte) 0);
315            CharEncoding.writeString(buffer, 0, word);
316            dictBuffer.position(0);
317            final String str = CharEncoding.readString(dictBuffer);
318            assertEquals(word, str);
319        }
320    }
321
322    public void testReadAndWriteWithByteBuffer() {
323        final List<String> results = new ArrayList<>();
324
325        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
326                BinaryDictUtils.VERSION2_OPTIONS);
327        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
328                BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
329        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
330                BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
331        for (final String result : results) {
332            Log.d(TAG, result);
333        }
334    }
335
336    public void testReadAndWriteWithByteArray() {
337        final List<String> results = new ArrayList<>();
338
339        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
340                BinaryDictUtils.VERSION2_OPTIONS);
341        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
342                BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
343        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
344                BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
345
346        for (final String result : results) {
347            Log.d(TAG, result);
348        }
349    }
350
351    // Tests for readUnigramsAndBigramsBinary
352
353    private void checkWordMap(final List<String> expectedWords,
354            final SparseArray<List<Integer>> expectedBigrams,
355            final TreeMap<Integer, String> resultWords,
356            final TreeMap<Integer, Integer> resultFrequencies,
357            final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams,
358            final boolean checkProbability) {
359        // check unigrams
360        final Set<String> actualWordsSet = new HashSet<>(resultWords.values());
361        final Set<String> expectedWordsSet = new HashSet<>(expectedWords);
362        assertEquals(actualWordsSet, expectedWordsSet);
363        if (checkProbability) {
364            for (int freq : resultFrequencies.values()) {
365                assertEquals(freq, UNIGRAM_FREQ);
366            }
367        }
368
369        // check bigrams
370        final HashMap<String, Set<String>> expBigrams = new HashMap<>();
371        for (int i = 0; i < expectedBigrams.size(); ++i) {
372            final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
373            for (int w2 : expectedBigrams.valueAt(i)) {
374                if (expBigrams.get(word1) == null) {
375                    expBigrams.put(word1, new HashSet<String>());
376                }
377                expBigrams.get(word1).add(expectedWords.get(w2));
378            }
379        }
380
381        final HashMap<String, Set<String>> actBigrams = new HashMap<>();
382        for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
383            final String word1 = resultWords.get(entry.getKey());
384            final int unigramFreq = resultFrequencies.get(entry.getKey());
385            for (PendingAttribute attr : entry.getValue()) {
386                final String word2 = resultWords.get(attr.mAddress);
387                if (actBigrams.get(word1) == null) {
388                    actBigrams.put(word1, new HashSet<String>());
389                }
390                actBigrams.get(word1).add(word2);
391
392                if (checkProbability) {
393                    final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
394                            unigramFreq, attr.mFrequency);
395                    assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
396                }
397            }
398        }
399        assertEquals(actBigrams, expBigrams);
400    }
401
402    private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
403            final SparseArray<List<Integer>> bigrams, final int bufferType,
404            final boolean checkProbability) {
405        final TreeMap<Integer, String> resultWords = new TreeMap<>();
406        final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>();
407        final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>();
408
409        long now = -1, diff = -1;
410        try {
411            final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
412                    bufferType);
413            now = System.currentTimeMillis();
414            dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
415            diff = System.currentTimeMillis() - now;
416        } catch (IOException e) {
417            Log.e(TAG, "IOException", e);
418        } catch (UnsupportedFormatException e) {
419            Log.e(TAG, "UnsupportedFormatException", e);
420        }
421
422        checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability);
423        return diff;
424    }
425
426    private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
427            final SparseArray<List<Integer>> bigrams, final int bufferType,
428            final FormatSpec.FormatOptions formatOptions, final String message) {
429        final String dictName = "runReadUnigrams";
430        final String dictVersion = Long.toString(System.currentTimeMillis());
431        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
432                getContext().getCacheDir());
433
434        // making the dictionary from lists of words.
435        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
436                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
437        addUnigrams(words.size(), dict, words, null /* shortcutMap */);
438        addBigrams(dict, words, bigrams);
439
440        timeWritingDictToFile(file, dict, formatOptions);
441
442        // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the
443        // probability when there's a timestamp for the entry.
444        // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native.
445        long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
446                !formatOptions.mHasTimestamp /* checkProbability */);
447        long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
448                bufferType);
449
450        return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
451                + " : " + message + " : " + outputOptions(bufferType, formatOptions);
452    }
453
454    private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
455            final int bufferType, final FormatSpec.FormatOptions formatOptions) {
456        results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
457                formatOptions, "unigram"));
458        results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
459                formatOptions, "chain"));
460        results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
461                formatOptions, "star"));
462    }
463
464    public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
465        final ArrayList<String> results = new ArrayList<>();
466
467        runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
468                BinaryDictUtils.VERSION2_OPTIONS);
469
470        for (final String result : results) {
471            Log.d(TAG, result);
472        }
473    }
474
475    public void testReadUnigramsAndBigramsBinaryWithByteArray() {
476        final ArrayList<String> results = new ArrayList<>();
477
478        runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
479                BinaryDictUtils.VERSION2_OPTIONS);
480
481        for (final String result : results) {
482            Log.d(TAG, result);
483        }
484    }
485
486    // Tests for getTerminalPosition
487    private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
488        if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
489
490        DictionaryHeader fileHeader = null;
491        try {
492            fileHeader = dictDecoder.readHeader();
493        } catch (IOException e) {
494            return null;
495        } catch (UnsupportedFormatException e) {
496            return null;
497        }
498        if (fileHeader == null) return null;
499        return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
500                address).mWord;
501    }
502
503    private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
504            final boolean contained) {
505        long diff = -1;
506        int position = -1;
507        try {
508            final long now = System.nanoTime();
509            position = dictDecoder.getTerminalPosition(word);
510            diff = System.nanoTime() - now;
511        } catch (IOException e) {
512            Log.e(TAG, "IOException while getTerminalPosition", e);
513        } catch (UnsupportedFormatException e) {
514            Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
515        }
516
517        assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
518        if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
519        return diff;
520    }
521
522    private void runGetTerminalPosition(final ArrayList<String> words,
523            final SparseArray<List<Integer>> bigrams, final int bufferType,
524            final FormatOptions formatOptions, final String message) {
525        final String dictName = "testGetTerminalPosition";
526        final String dictVersion = Long.toString(System.currentTimeMillis());
527        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
528                getContext().getCacheDir());
529
530        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
531                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
532        addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
533        addBigrams(dict, words, bigrams);
534        timeWritingDictToFile(file, dict, formatOptions);
535
536        final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
537                DictDecoder.USE_BYTEARRAY);
538        try {
539            dictDecoder.openDictBuffer();
540        } catch (IOException e) {
541            Log.e(TAG, "IOException while opening the buffer", e);
542        } catch (UnsupportedFormatException e) {
543            Log.e(TAG, "IOException while opening the buffer", e);
544        }
545        assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
546
547        try {
548            // too long word
549            final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
550            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
551
552            // null
553            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
554
555            // empty string
556            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
557        } catch (IOException e) {
558        } catch (UnsupportedFormatException e) {
559        }
560
561        // Test a word that is contained within the dictionary.
562        long sum = 0;
563        for (int i = 0; i < sWords.size(); ++i) {
564            final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
565            sum += time == -1 ? 0 : time;
566        }
567        Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
568                + " : " + outputOptions(bufferType, formatOptions));
569
570        // Test a word that isn't contained within the dictionary.
571        final Random random = new Random((int)System.currentTimeMillis());
572        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
573                random);
574        for (int i = 0; i < 1000; ++i) {
575            final String word = CodePointUtils.generateWord(random, codePointSet);
576            if (sWords.indexOf(word) != -1) continue;
577            checkGetTerminalPosition(dictDecoder, word, false);
578        }
579    }
580
581    private void runGetTerminalPositionTests(final int bufferType,
582            final FormatOptions formatOptions) {
583        runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
584    }
585
586    public void testGetTerminalPosition() {
587        final ArrayList<String> results = new ArrayList<>();
588
589        runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
590                BinaryDictUtils.VERSION2_OPTIONS);
591        runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
592                BinaryDictUtils.VERSION2_OPTIONS);
593
594        for (final String result : results) {
595            Log.d(TAG, result);
596        }
597    }
598
599    public void testVer2DictGetWordProperty() {
600        final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
601        final ArrayList<String> words = sWords;
602        final HashMap<String, List<String>> shortcuts = sShortcuts;
603        final String dictName = "testGetWordProperty";
604        final String dictVersion = Long.toString(System.currentTimeMillis());
605        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
606                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
607        addUnigrams(words.size(), dict, words, shortcuts);
608        addBigrams(dict, words, sEmptyBigrams);
609        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
610                getContext().getCacheDir());
611        file.delete();
612        timeWritingDictToFile(file, dict, formatOptions);
613        final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
614                0 /* offset */, file.length(), true /* useFullEditDistance */,
615                Locale.ENGLISH, dictName, false /* isUpdatable */);
616        for (final String word : words) {
617            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
618                    false /* isBeginningOfSentence */);
619            assertEquals(word, wordProperty.mWord);
620            assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
621            if (shortcuts.containsKey(word)) {
622                assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
623                final List<String> shortcutList = shortcuts.get(word);
624                assertTrue(wordProperty.mHasShortcuts);
625                for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
626                    assertTrue(shortcutList.contains(shortcutTarget.mWord));
627                    assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
628                    shortcutList.remove(shortcutTarget.mWord);
629                }
630                assertTrue(shortcutList.isEmpty());
631            }
632        }
633    }
634
635    public void testVer2DictIteration() {
636        final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
637        final ArrayList<String> words = sWords;
638        final HashMap<String, List<String>> shortcuts = sShortcuts;
639        final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
640        final String dictName = "testGetWordProperty";
641        final String dictVersion = Long.toString(System.currentTimeMillis());
642        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
643                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
644        addUnigrams(words.size(), dict, words, shortcuts);
645        addBigrams(dict, words, bigrams);
646        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
647                getContext().getCacheDir());
648        timeWritingDictToFile(file, dict, formatOptions);
649        Log.d(TAG, file.getAbsolutePath());
650        final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
651                0 /* offset */, file.length(), true /* useFullEditDistance */,
652                Locale.ENGLISH, dictName, false /* isUpdatable */);
653
654        final HashSet<String> wordSet = new HashSet<>(words);
655        final HashSet<Pair<String, String>> bigramSet = new HashSet<>();
656
657        for (int i = 0; i < words.size(); i++) {
658            final List<Integer> bigramList = bigrams.get(i);
659            if (bigramList != null) {
660                for (final Integer word1Index : bigramList) {
661                    final String word1 = words.get(word1Index);
662                    bigramSet.add(new Pair<>(words.get(i), word1));
663                }
664            }
665        }
666        int token = 0;
667        do {
668            final BinaryDictionary.GetNextWordPropertyResult result =
669                    binaryDictionary.getNextWordProperty(token);
670            final WordProperty wordProperty = result.mWordProperty;
671            final String word0 = wordProperty.mWord;
672            assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
673            wordSet.remove(word0);
674            if (shortcuts.containsKey(word0)) {
675                assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size());
676                final List<String> shortcutList = shortcuts.get(word0);
677                assertNotNull(wordProperty.mShortcutTargets);
678                for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
679                    assertTrue(shortcutList.contains(shortcutTarget.mWord));
680                    assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
681                    shortcutList.remove(shortcutTarget.mWord);
682                }
683                assertTrue(shortcutList.isEmpty());
684            }
685            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
686                final String word1 = wordProperty.mBigrams.get(j).mWord;
687                final Pair<String, String> bigram = new Pair<>(word0, word1);
688                assertTrue(bigramSet.contains(bigram));
689                bigramSet.remove(bigram);
690            }
691            token = result.mNextToken;
692        } while (token != 0);
693        assertTrue(wordSet.isEmpty());
694        assertTrue(bigramSet.isEmpty());
695    }
696}
697