BinaryDictDecoderEncoderTests.java revision 8ffc631826b108423f98e3ff4d987f067cbc4e0c
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.util.Log;
22import android.util.SparseArray;
23
24import com.android.inputmethod.latin.BinaryDictionary;
25import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
26import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
27import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
28import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
29import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
30import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
31import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
32import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
33import com.android.inputmethod.latin.utils.CollectionUtils;
34
35import java.io.File;
36import java.io.IOException;
37import java.util.ArrayList;
38import java.util.Arrays;
39import java.util.HashMap;
40import java.util.HashSet;
41import java.util.List;
42import java.util.Map.Entry;
43import java.util.Random;
44import java.util.Set;
45import java.util.TreeMap;
46
47/**
48 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
49 */
50@LargeTest
51public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
52    private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
53    private static final int DEFAULT_MAX_UNIGRAMS = 300;
54    private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
55    private static final int LARGE_CODE_POINT_SET_SIZE = 300;
56    private static final int UNIGRAM_FREQ = 10;
57    private static final int BIGRAM_FREQ = 50;
58    private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
59    private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
60    private static final int NUM_OF_SHORTCUTS = 5;
61
62    private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
63    private static final ArrayList<String> sWordsWithVariousCodePoints =
64            CollectionUtils.newArrayList();
65    private static final SparseArray<List<Integer>> sEmptyBigrams =
66            CollectionUtils.newSparseArray();
67    private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
68    private static final SparseArray<List<Integer>> sChainBigrams =
69            CollectionUtils.newSparseArray();
70    private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
71
72    public BinaryDictDecoderEncoderTests() {
73        this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
74    }
75
76    public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
77        super();
78        BinaryDictionary.setCurrentTimeForTest(0);
79        Log.e(TAG, "Testing dictionary: seed is " + seed);
80        final Random random = new Random(seed);
81        sWords.clear();
82        sWordsWithVariousCodePoints.clear();
83        generateWords(maxUnigrams, random);
84
85        for (int i = 0; i < sWords.size(); ++i) {
86            sChainBigrams.put(i, new ArrayList<Integer>());
87            if (i > 0) {
88                sChainBigrams.get(i - 1).add(i);
89            }
90        }
91
92        sStarBigrams.put(0, new ArrayList<Integer>());
93        // MAX - 1 because we added one above already
94        final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
95        for (int i = 1; i < maxBigrams; ++i) {
96            sStarBigrams.get(0).add(i);
97        }
98
99        sShortcuts.clear();
100        for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
101            final int from = Math.abs(random.nextInt()) % sWords.size();
102            sShortcuts.put(sWords.get(from), new ArrayList<String>());
103            for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
104                final int to = Math.abs(random.nextInt()) % sWords.size();
105                sShortcuts.get(sWords.get(from)).add(sWords.get(to));
106            }
107        }
108    }
109
110    @Override
111    protected void setUp() throws Exception {
112        super.setUp();
113        BinaryDictionary.setCurrentTimeForTest(0);
114    }
115
116    @Override
117    protected void tearDown() throws Exception {
118        super.tearDown();
119        // Quit test mode.
120        BinaryDictionary.setCurrentTimeForTest(-1);
121    }
122
123    private void generateWords(final int number, final Random random) {
124        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
125                random);
126        final Set<String> wordSet = CollectionUtils.newHashSet();
127        while (wordSet.size() < number) {
128            wordSet.add(CodePointUtils.generateWord(random, codePointSet));
129        }
130        sWords.addAll(wordSet);
131
132        final int[] largeCodePointSet = CodePointUtils.generateCodePointSet(
133                LARGE_CODE_POINT_SET_SIZE, random);
134        wordSet.clear();
135        while (wordSet.size() < number) {
136            wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet));
137        }
138        sWordsWithVariousCodePoints.addAll(wordSet);
139    }
140
141    /**
142     * Adds unigrams to the dictionary.
143     */
144    private void addUnigrams(final int number, final FusionDictionary dict,
145            final List<String> words, final HashMap<String, List<String>> shortcutMap) {
146        for (int i = 0; i < number; ++i) {
147            final String word = words.get(i);
148            final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
149            if (shortcutMap != null && shortcutMap.containsKey(word)) {
150                for (final String shortcut : shortcutMap.get(word)) {
151                    shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
152                }
153            }
154            dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
155                    (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
156        }
157    }
158
159    private void addBigrams(final FusionDictionary dict,
160            final List<String> words,
161            final SparseArray<List<Integer>> bigrams) {
162        for (int i = 0; i < bigrams.size(); ++i) {
163            final int w1 = bigrams.keyAt(i);
164            for (int w2 : bigrams.valueAt(i)) {
165                dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ));
166            }
167        }
168    }
169
170//    The following is useful to dump the dictionary into a textual file, but it can't compile
171//    on-device, so it's commented out.
172//    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
173//            throws IOException {
174//        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
175//                new java.io.FileWriter(new File(filename)), dict);
176//    }
177
178    private long timeWritingDictToFile(final File file, final FusionDictionary dict,
179            final FormatSpec.FormatOptions formatOptions) {
180
181        long now = -1, diff = -1;
182
183        try {
184            final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
185
186            now = System.currentTimeMillis();
187            // If you need to dump the dict to a textual file, uncomment the line below and the
188            // function above
189            // dumpToCombinedFileForDebug(file, "/tmp/foo");
190            dictEncoder.writeDictionary(dict, formatOptions);
191            diff = System.currentTimeMillis() - now;
192        } catch (IOException e) {
193            Log.e(TAG, "IO exception while writing file", e);
194        } catch (UnsupportedFormatException e) {
195            Log.e(TAG, "UnsupportedFormatException", e);
196        }
197
198        return diff;
199    }
200
201    private void checkDictionary(final FusionDictionary dict, final List<String> words,
202            final SparseArray<List<Integer>> bigrams,
203            final HashMap<String, List<String>> shortcutMap) {
204        assertNotNull(dict);
205
206        // check unigram
207        for (final String word : words) {
208            final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
209            assertNotNull(ptNode);
210        }
211
212        // check bigram
213        for (int i = 0; i < bigrams.size(); ++i) {
214            final int w1 = bigrams.keyAt(i);
215            for (final int w2 : bigrams.valueAt(i)) {
216                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
217                        words.get(w1));
218                assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
219            }
220        }
221
222        // check shortcut
223        if (shortcutMap != null) {
224            for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
225                assertTrue(words.contains(entry.getKey()));
226                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
227                        entry.getKey());
228                for (final String word : entry.getValue()) {
229                    assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
230                            ptNode.getShortcut(word));
231                }
232            }
233        }
234    }
235
236    private String outputOptions(final int bufferType,
237            final FormatSpec.FormatOptions formatOptions) {
238        String result = " : buffer type = "
239                + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
240        return result + " : version = " + formatOptions.mVersion;
241    }
242
243    // Tests for readDictionaryBinary and writeDictionaryBinary
244
245    private long timeReadingAndCheckDict(final File file, final List<String> words,
246            final SparseArray<List<Integer>> bigrams,
247            final HashMap<String, List<String>> shortcutMap, final int bufferType) {
248        long now, diff = -1;
249
250        FusionDictionary dict = null;
251        try {
252            final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType);
253            now = System.currentTimeMillis();
254            dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
255            diff  = System.currentTimeMillis() - now;
256        } catch (IOException e) {
257            Log.e(TAG, "IOException while reading dictionary", e);
258        } catch (UnsupportedFormatException e) {
259            Log.e(TAG, "Unsupported format", e);
260        }
261
262        checkDictionary(dict, words, bigrams, shortcutMap);
263        return diff;
264    }
265
266    // Tests for readDictionaryBinary and writeDictionaryBinary
267    private String runReadAndWrite(final List<String> words,
268            final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
269            final int bufferType, final FormatSpec.FormatOptions formatOptions,
270            final String message) {
271
272        final String dictName = "runReadAndWrite";
273        final String dictVersion = Long.toString(System.currentTimeMillis());
274        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
275                getContext().getCacheDir());
276
277        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
278                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
279        addUnigrams(words.size(), dict, words, shortcuts);
280        addBigrams(dict, words, bigrams);
281        checkDictionary(dict, words, bigrams, shortcuts);
282
283        final long write = timeWritingDictToFile(file, dict, formatOptions);
284        final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType);
285
286        return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
287                + " : " + outputOptions(bufferType, formatOptions);
288    }
289
290    private void runReadAndWriteTests(final List<String> results, final int bufferType,
291            final FormatSpec.FormatOptions formatOptions) {
292        results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
293                formatOptions, "unigram"));
294        results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
295                formatOptions, "chain"));
296        results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
297                formatOptions, "star"));
298        results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
299                "unigram with shortcuts"));
300        results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
301                "chain with shortcuts"));
302        results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
303                "star with shortcuts"));
304        results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams,
305                null /* shortcuts */, bufferType, formatOptions,
306                "unigram with various code points"));
307    }
308
309    // Unit test for CharEncoding.readString and CharEncoding.writeString.
310    public void testCharEncoding() {
311        // the max length of a word in sWords is less than 50.
312        // See generateWords.
313        final byte[] buffer = new byte[50 * 3];
314        final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
315        for (final String word : sWords) {
316            Arrays.fill(buffer, (byte) 0);
317            CharEncoding.writeString(buffer, 0, word);
318            dictBuffer.position(0);
319            final String str = CharEncoding.readString(dictBuffer);
320            assertEquals(word, str);
321        }
322    }
323
324    public void testReadAndWriteWithByteBuffer() {
325        final List<String> results = CollectionUtils.newArrayList();
326
327        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
328                BinaryDictUtils.VERSION2_OPTIONS);
329        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
330                BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
331        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
332                BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
333        for (final String result : results) {
334            Log.d(TAG, result);
335        }
336    }
337
338    public void testReadAndWriteWithByteArray() {
339        final List<String> results = CollectionUtils.newArrayList();
340
341        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
342                BinaryDictUtils.VERSION2_OPTIONS);
343        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
344                BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
345        runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
346                BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
347
348        for (final String result : results) {
349            Log.d(TAG, result);
350        }
351    }
352
353    // Tests for readUnigramsAndBigramsBinary
354
355    private void checkWordMap(final List<String> expectedWords,
356            final SparseArray<List<Integer>> expectedBigrams,
357            final TreeMap<Integer, String> resultWords,
358            final TreeMap<Integer, Integer> resultFrequencies,
359            final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams,
360            final boolean checkProbability) {
361        // check unigrams
362        final Set<String> actualWordsSet = new HashSet<String>(resultWords.values());
363        final Set<String> expectedWordsSet = new HashSet<String>(expectedWords);
364        assertEquals(actualWordsSet, expectedWordsSet);
365        if (checkProbability) {
366            for (int freq : resultFrequencies.values()) {
367                assertEquals(freq, UNIGRAM_FREQ);
368            }
369        }
370
371        // check bigrams
372        final HashMap<String, Set<String>> expBigrams = new HashMap<String, Set<String>>();
373        for (int i = 0; i < expectedBigrams.size(); ++i) {
374            final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
375            for (int w2 : expectedBigrams.valueAt(i)) {
376                if (expBigrams.get(word1) == null) {
377                    expBigrams.put(word1, new HashSet<String>());
378                }
379                expBigrams.get(word1).add(expectedWords.get(w2));
380            }
381        }
382
383        final HashMap<String, Set<String>> actBigrams = new HashMap<String, Set<String>>();
384        for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
385            final String word1 = resultWords.get(entry.getKey());
386            final int unigramFreq = resultFrequencies.get(entry.getKey());
387            for (PendingAttribute attr : entry.getValue()) {
388                final String word2 = resultWords.get(attr.mAddress);
389                if (actBigrams.get(word1) == null) {
390                    actBigrams.put(word1, new HashSet<String>());
391                }
392                actBigrams.get(word1).add(word2);
393
394                if (checkProbability) {
395                    final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
396                            unigramFreq, attr.mFrequency);
397                    assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
398                }
399            }
400        }
401        assertEquals(actBigrams, expBigrams);
402    }
403
404    private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
405            final SparseArray<List<Integer>> bigrams, final int bufferType,
406            final boolean checkProbability) {
407        final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap();
408        final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams =
409                CollectionUtils.newTreeMap();
410        final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
411
412        long now = -1, diff = -1;
413        try {
414            final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, bufferType);
415            now = System.currentTimeMillis();
416            dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
417            diff = System.currentTimeMillis() - now;
418        } catch (IOException e) {
419            Log.e(TAG, "IOException", e);
420        } catch (UnsupportedFormatException e) {
421            Log.e(TAG, "UnsupportedFormatException", e);
422        }
423
424        checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability);
425        return diff;
426    }
427
428    private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
429            final SparseArray<List<Integer>> bigrams, final int bufferType,
430            final FormatSpec.FormatOptions formatOptions, final String message) {
431        final String dictName = "runReadUnigrams";
432        final String dictVersion = Long.toString(System.currentTimeMillis());
433        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
434                getContext().getCacheDir());
435
436        // making the dictionary from lists of words.
437        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
438                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
439        addUnigrams(words.size(), dict, words, null /* shortcutMap */);
440        addBigrams(dict, words, bigrams);
441
442        timeWritingDictToFile(file, dict, formatOptions);
443
444        // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the
445        // probability when there's a timestamp for the entry.
446        // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native.
447        long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
448                !formatOptions.mHasTimestamp /* checkProbability */);
449        long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
450                bufferType);
451
452        return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
453                + " : " + message + " : " + outputOptions(bufferType, formatOptions);
454    }
455
456    private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
457            final int bufferType, final FormatSpec.FormatOptions formatOptions) {
458        results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
459                formatOptions, "unigram"));
460        results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
461                formatOptions, "chain"));
462        results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
463                formatOptions, "star"));
464    }
465
466    public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
467        final ArrayList<String> results = CollectionUtils.newArrayList();
468
469        runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
470                BinaryDictUtils.VERSION2_OPTIONS);
471
472        for (final String result : results) {
473            Log.d(TAG, result);
474        }
475    }
476
477    public void testReadUnigramsAndBigramsBinaryWithByteArray() {
478        final ArrayList<String> results = CollectionUtils.newArrayList();
479
480        runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
481                BinaryDictUtils.VERSION2_OPTIONS);
482
483        for (final String result : results) {
484            Log.d(TAG, result);
485        }
486    }
487
488    // Tests for getTerminalPosition
489    private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
490        if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
491
492        DictionaryHeader fileHeader = null;
493        try {
494            fileHeader = dictDecoder.readHeader();
495        } catch (IOException e) {
496            return null;
497        } catch (UnsupportedFormatException e) {
498            return null;
499        }
500        if (fileHeader == null) return null;
501        return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
502                address, fileHeader.mFormatOptions).mWord;
503    }
504
505    private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
506            final boolean contained) {
507        long diff = -1;
508        int position = -1;
509        try {
510            final long now = System.nanoTime();
511            position = dictDecoder.getTerminalPosition(word);
512            diff = System.nanoTime() - now;
513        } catch (IOException e) {
514            Log.e(TAG, "IOException while getTerminalPosition", e);
515        } catch (UnsupportedFormatException e) {
516            Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
517        }
518
519        assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
520        if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
521        return diff;
522    }
523
524    private void runGetTerminalPosition(final ArrayList<String> words,
525            final SparseArray<List<Integer>> bigrams, final int bufferType,
526            final FormatOptions formatOptions, final String message) {
527        final String dictName = "testGetTerminalPosition";
528        final String dictVersion = Long.toString(System.currentTimeMillis());
529        final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
530                getContext().getCacheDir());
531
532        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
533                BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
534        addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
535        addBigrams(dict, words, bigrams);
536        timeWritingDictToFile(file, dict, formatOptions);
537
538        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file, DictDecoder.USE_BYTEARRAY);
539        try {
540            dictDecoder.openDictBuffer();
541        } catch (IOException e) {
542            Log.e(TAG, "IOException while opening the buffer", e);
543        } catch (UnsupportedFormatException e) {
544            Log.e(TAG, "IOException while opening the buffer", e);
545        }
546        assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
547
548        try {
549            // too long word
550            final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
551            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
552
553            // null
554            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
555
556            // empty string
557            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
558        } catch (IOException e) {
559        } catch (UnsupportedFormatException e) {
560        }
561
562        // Test a word that is contained within the dictionary.
563        long sum = 0;
564        for (int i = 0; i < sWords.size(); ++i) {
565            final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
566            sum += time == -1 ? 0 : time;
567        }
568        Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
569                + " : " + outputOptions(bufferType, formatOptions));
570
571        // Test a word that isn't contained within the dictionary.
572        final Random random = new Random((int)System.currentTimeMillis());
573        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
574                random);
575        for (int i = 0; i < 1000; ++i) {
576            final String word = CodePointUtils.generateWord(random, codePointSet);
577            if (sWords.indexOf(word) != -1) continue;
578            checkGetTerminalPosition(dictDecoder, word, false);
579        }
580    }
581
582    private void runGetTerminalPositionTests(final int bufferType,
583            final FormatOptions formatOptions) {
584        runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
585    }
586
587    public void testGetTerminalPosition() {
588        final ArrayList<String> results = CollectionUtils.newArrayList();
589
590        runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
591                BinaryDictUtils.VERSION2_OPTIONS);
592        runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
593                BinaryDictUtils.VERSION2_OPTIONS);
594
595        for (final String result : results) {
596            Log.d(TAG, result);
597        }
598    }
599}
600