BinaryDictDecoderEncoderTests.java revision fa946d4a0f6329134ddbae9a3ce03f6d4009019b
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import android.test.AndroidTestCase;
20import android.test.MoreAsserts;
21import android.test.suitebuilder.annotation.LargeTest;
22import android.util.Log;
23import android.util.SparseArray;
24
25import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
26import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
27import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
28import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
29import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
30import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
31import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
32import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
33import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
34import com.android.inputmethod.latin.utils.CollectionUtils;
35
36import java.io.File;
37import java.io.FileInputStream;
38import java.io.IOException;
39import java.util.ArrayList;
40import java.util.Arrays;
41import java.util.HashMap;
42import java.util.HashSet;
43import java.util.List;
44import java.util.Map.Entry;
45import java.util.Random;
46import java.util.Set;
47import java.util.TreeMap;
48
49/**
50 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
51 */
52@LargeTest
53public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
54    private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
55    private static final int DEFAULT_MAX_UNIGRAMS = 100;
56    private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
57    private static final int UNIGRAM_FREQ = 10;
58    private static final int BIGRAM_FREQ = 50;
59    private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
60    private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
61    private static final int NUM_OF_SHORTCUTS = 5;
62
63    private static final int USE_BYTE_ARRAY = 1;
64    private static final int USE_BYTE_BUFFER = 2;
65
66    private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
67    private static final SparseArray<List<Integer>> sEmptyBigrams =
68            CollectionUtils.newSparseArray();
69    private static final SparseArray<List<Integer>> sStarBigrams = CollectionUtils.newSparseArray();
70    private static final SparseArray<List<Integer>> sChainBigrams =
71            CollectionUtils.newSparseArray();
72    private static final HashMap<String, List<String>> sShortcuts = CollectionUtils.newHashMap();
73
74    private static final FormatSpec.FormatOptions VERSION2 = new FormatSpec.FormatOptions(2);
75    private static final FormatSpec.FormatOptions VERSION3_WITHOUT_DYNAMIC_UPDATE =
76            new FormatSpec.FormatOptions(3, false /* supportsDynamicUpdate */);
77    private static final FormatSpec.FormatOptions VERSION3_WITH_DYNAMIC_UPDATE =
78            new FormatSpec.FormatOptions(3, true /* supportsDynamicUpdate */);
79    private static final FormatSpec.FormatOptions VERSION4_WITHOUT_DYNAMIC_UPDATE =
80            new FormatSpec.FormatOptions(4, false /* supportsDynamicUpdate */);
81    private static final FormatSpec.FormatOptions VERSION4_WITH_DYNAMIC_UPDATE =
82            new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */);
83
84    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
85
86    public BinaryDictDecoderEncoderTests() {
87        this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
88    }
89
90    public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
91        super();
92        Log.e(TAG, "Testing dictionary: seed is " + seed);
93        final Random random = new Random(seed);
94        sWords.clear();
95        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
96                random);
97        generateWords(maxUnigrams, random, codePointSet);
98
99        for (int i = 0; i < sWords.size(); ++i) {
100            sChainBigrams.put(i, new ArrayList<Integer>());
101            if (i > 0) {
102                sChainBigrams.get(i - 1).add(i);
103            }
104        }
105
106        sStarBigrams.put(0, new ArrayList<Integer>());
107        // MAX - 1 because we added one above already
108        final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
109        for (int i = 1; i < maxBigrams; ++i) {
110            sStarBigrams.get(0).add(i);
111        }
112
113        sShortcuts.clear();
114        for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
115            final int from = Math.abs(random.nextInt()) % sWords.size();
116            sShortcuts.put(sWords.get(from), new ArrayList<String>());
117            for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
118                final int to = Math.abs(random.nextInt()) % sWords.size();
119                sShortcuts.get(sWords.get(from)).add(sWords.get(to));
120            }
121        }
122    }
123
124    private DictEncoder getDictEncoder(final File file, final FormatOptions formatOptions) {
125        if (formatOptions.mVersion == FormatSpec.VERSION4) {
126            return new Ver4DictEncoder(getContext().getCacheDir());
127        } else if (formatOptions.mVersion == 3 || formatOptions.mVersion == 2) {
128            return new Ver3DictEncoder(file);
129        } else {
130            throw new RuntimeException("The format option has a wrong version : "
131                    + formatOptions.mVersion);
132        }
133    }
134
135    private void generateWords(final int number, final Random random, final int[] codePointSet) {
136        final Set<String> wordSet = CollectionUtils.newHashSet();
137        while (wordSet.size() < number) {
138            wordSet.add(CodePointUtils.generateWord(random, codePointSet));
139        }
140        sWords.addAll(wordSet);
141    }
142
143    /**
144     * Adds unigrams to the dictionary.
145     */
146    private void addUnigrams(final int number, final FusionDictionary dict,
147            final List<String> words, final HashMap<String, List<String>> shortcutMap) {
148        for (int i = 0; i < number; ++i) {
149            final String word = words.get(i);
150            final ArrayList<WeightedString> shortcuts = CollectionUtils.newArrayList();
151            if (shortcutMap != null && shortcutMap.containsKey(word)) {
152                for (final String shortcut : shortcutMap.get(word)) {
153                    shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
154                }
155            }
156            dict.add(word, UNIGRAM_FREQ, (shortcutMap == null) ? null : shortcuts,
157                    false /* isNotAWord */);
158        }
159    }
160
161    private void addBigrams(final FusionDictionary dict,
162            final List<String> words,
163            final SparseArray<List<Integer>> bigrams) {
164        for (int i = 0; i < bigrams.size(); ++i) {
165            final int w1 = bigrams.keyAt(i);
166            for (int w2 : bigrams.valueAt(i)) {
167                dict.setBigram(words.get(w1), words.get(w2), BIGRAM_FREQ);
168            }
169        }
170    }
171
172//    The following is useful to dump the dictionary into a textual file, but it can't compile
173//    on-device, so it's commented out.
174//    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
175//            throws IOException {
176//        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
177//                new java.io.FileWriter(new File(filename)), dict);
178//    }
179
180    private long timeWritingDictToFile(final File file, final FusionDictionary dict,
181            final FormatSpec.FormatOptions formatOptions) {
182
183        long now = -1, diff = -1;
184
185        try {
186            final DictEncoder dictEncoder = getDictEncoder(file, formatOptions);
187
188            now = System.currentTimeMillis();
189            // If you need to dump the dict to a textual file, uncomment the line below and the
190            // function above
191            // dumpToCombinedFileForDebug(file, "/tmp/foo");
192            dictEncoder.writeDictionary(dict, formatOptions);
193            diff = System.currentTimeMillis() - now;
194        } catch (IOException e) {
195            Log.e(TAG, "IO exception while writing file", e);
196        } catch (UnsupportedFormatException e) {
197            Log.e(TAG, "UnsupportedFormatException", e);
198        }
199
200        return diff;
201    }
202
203    private void checkDictionary(final FusionDictionary dict, final List<String> words,
204            final SparseArray<List<Integer>> bigrams,
205            final HashMap<String, List<String>> shortcutMap) {
206        assertNotNull(dict);
207
208        // check unigram
209        for (final String word : words) {
210            final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
211            assertNotNull(ptNode);
212        }
213
214        // check bigram
215        for (int i = 0; i < bigrams.size(); ++i) {
216            final int w1 = bigrams.keyAt(i);
217            for (final int w2 : bigrams.valueAt(i)) {
218                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
219                        words.get(w1));
220                assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
221            }
222        }
223
224        // check shortcut
225        if (shortcutMap != null) {
226            for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
227                assertTrue(words.contains(entry.getKey()));
228                final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
229                        entry.getKey());
230                for (final String word : entry.getValue()) {
231                    assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
232                            ptNode.getShortcut(word));
233                }
234            }
235        }
236    }
237
238    private String outputOptions(final int bufferType,
239            final FormatSpec.FormatOptions formatOptions) {
240        String result = " : buffer type = "
241                + ((bufferType == USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
242        result += " : version = " + formatOptions.mVersion;
243        return result + ", supportsDynamicUpdate = " + formatOptions.mSupportsDynamicUpdate;
244    }
245
246    private DictionaryOptions getDictionaryOptions(final String id, final String version) {
247        final DictionaryOptions options = new DictionaryOptions(new HashMap<String, String>(),
248                false, false);
249        options.mAttributes.put("version", version);
250        options.mAttributes.put("dictionary", id);
251        return options;
252    }
253
254    private File setUpDictionaryFile(final String name, final String version) {
255        File file = null;
256        try {
257            file = new File(getContext().getCacheDir(), name + "." + version
258                    + TEST_DICT_FILE_EXTENSION);
259            file.createNewFile();
260        } catch (IOException e) {
261            // do nothing
262        }
263        assertTrue("Failed to create the dictionary file.", file.exists());
264        return file;
265    }
266
267    private DictDecoder getDictDecoder(final File file, final int bufferType,
268            final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
269        if (formatOptions.mVersion == FormatSpec.VERSION4) {
270            final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
271            return FormatSpec.getDictDecoder(new File(getContext().getCacheDir(),
272                    header.getId() + "." + header.getVersion()), bufferType);
273        } else {
274            return FormatSpec.getDictDecoder(file, bufferType);
275        }
276    }
277    // Tests for readDictionaryBinary and writeDictionaryBinary
278
279    private long timeReadingAndCheckDict(final File file, final List<String> words,
280            final SparseArray<List<Integer>> bigrams,
281            final HashMap<String, List<String>> shortcutMap, final int bufferType,
282            final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
283        long now, diff = -1;
284
285        FusionDictionary dict = null;
286        try {
287            final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
288                    dictOptions);
289            now = System.currentTimeMillis();
290            dict = dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
291            diff  = System.currentTimeMillis() - now;
292        } catch (IOException e) {
293            Log.e(TAG, "IOException while reading dictionary", e);
294        } catch (UnsupportedFormatException e) {
295            Log.e(TAG, "Unsupported format", e);
296        }
297
298        checkDictionary(dict, words, bigrams, shortcutMap);
299        return diff;
300    }
301
302    // Tests for readDictionaryBinary and writeDictionaryBinary
303    private String runReadAndWrite(final List<String> words,
304            final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
305            final int bufferType, final FormatSpec.FormatOptions formatOptions,
306            final String message) {
307
308        final String dictName = "runReadAndWrite";
309        final String dictVersion = Long.toString(System.currentTimeMillis());
310        final File file = setUpDictionaryFile(dictName, dictVersion);
311
312        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
313                getDictionaryOptions(dictName, dictVersion));
314        addUnigrams(words.size(), dict, words, shortcuts);
315        addBigrams(dict, words, bigrams);
316        checkDictionary(dict, words, bigrams, shortcuts);
317
318        final long write = timeWritingDictToFile(file, dict, formatOptions);
319        final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType,
320                formatOptions, dict.mOptions);
321
322        return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
323                + " : " + outputOptions(bufferType, formatOptions);
324    }
325
326    private void runReadAndWriteTests(final List<String> results, final int bufferType,
327            final FormatSpec.FormatOptions formatOptions) {
328        results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
329                formatOptions, "unigram"));
330        results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
331                formatOptions, "chain"));
332        results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
333                formatOptions, "star"));
334        results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
335                "unigram with shortcuts"));
336        results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
337                "chain with shortcuts"));
338        results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
339                "star with shortcuts"));
340    }
341
342    // Unit test for CharEncoding.readString and CharEncoding.writeString.
343    public void testCharEncoding() {
344        // the max length of a word in sWords is less than 50.
345        // See generateWords.
346        final byte[] buffer = new byte[50 * 3];
347        final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
348        for (final String word : sWords) {
349            Log.d("testReadAndWriteString", "write : " + word);
350            Arrays.fill(buffer, (byte)0);
351            CharEncoding.writeString(buffer, 0, word);
352            dictBuffer.position(0);
353            final String str = CharEncoding.readString(dictBuffer);
354            assertEquals(word, str);
355        }
356    }
357
358    public void testReadAndWriteWithByteBuffer() {
359        final List<String> results = CollectionUtils.newArrayList();
360
361        runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION2);
362        runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
363        runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
364        runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
365        runReadAndWriteTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
366
367        for (final String result : results) {
368            Log.d(TAG, result);
369        }
370    }
371
372    public void testReadAndWriteWithByteArray() {
373        final List<String> results = CollectionUtils.newArrayList();
374
375        runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION2);
376        runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
377        runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
378        runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
379        runReadAndWriteTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
380
381        for (final String result : results) {
382            Log.d(TAG, result);
383        }
384    }
385
386    // Tests for readUnigramsAndBigramsBinary
387
388    private void checkWordMap(final List<String> expectedWords,
389            final SparseArray<List<Integer>> expectedBigrams,
390            final TreeMap<Integer, String> resultWords,
391            final TreeMap<Integer, Integer> resultFrequencies,
392            final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams) {
393        // check unigrams
394        final Set<String> actualWordsSet = new HashSet<String>(resultWords.values());
395        final Set<String> expectedWordsSet = new HashSet<String>(expectedWords);
396        assertEquals(actualWordsSet, expectedWordsSet);
397
398        for (int freq : resultFrequencies.values()) {
399            assertEquals(freq, UNIGRAM_FREQ);
400        }
401
402        // check bigrams
403        final HashMap<String, List<String>> expBigrams = new HashMap<String, List<String>>();
404        for (int i = 0; i < expectedBigrams.size(); ++i) {
405            final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
406            for (int w2 : expectedBigrams.valueAt(i)) {
407                if (expBigrams.get(word1) == null) {
408                    expBigrams.put(word1, new ArrayList<String>());
409                }
410                expBigrams.get(word1).add(expectedWords.get(w2));
411            }
412        }
413
414        final HashMap<String, List<String>> actBigrams = new HashMap<String, List<String>>();
415        for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
416            final String word1 = resultWords.get(entry.getKey());
417            final int unigramFreq = resultFrequencies.get(entry.getKey());
418            for (PendingAttribute attr : entry.getValue()) {
419                final String word2 = resultWords.get(attr.mAddress);
420                if (actBigrams.get(word1) == null) {
421                    actBigrams.put(word1, new ArrayList<String>());
422                }
423                actBigrams.get(word1).add(word2);
424
425                final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
426                        unigramFreq, attr.mFrequency);
427                assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
428            }
429        }
430
431        assertEquals(actBigrams, expBigrams);
432    }
433
434    private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
435            final SparseArray<List<Integer>> bigrams, final int bufferType,
436            final FormatOptions formatOptions, final DictionaryOptions dictOptions) {
437        FileInputStream inStream = null;
438
439        final TreeMap<Integer, String> resultWords = CollectionUtils.newTreeMap();
440        final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams =
441                CollectionUtils.newTreeMap();
442        final TreeMap<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
443
444        long now = -1, diff = -1;
445        try {
446            final DictDecoder dictDecoder = getDictDecoder(file, bufferType, formatOptions,
447                    dictOptions);
448            now = System.currentTimeMillis();
449            dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
450            diff = System.currentTimeMillis() - now;
451        } catch (IOException e) {
452            Log.e(TAG, "IOException", e);
453        } catch (UnsupportedFormatException e) {
454            Log.e(TAG, "UnsupportedFormatException", e);
455        } finally {
456            if (inStream != null) {
457                try {
458                    inStream.close();
459                } catch (IOException e) {
460                    // do nothing
461                }
462            }
463        }
464
465        checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams);
466        return diff;
467    }
468
469    private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
470            final SparseArray<List<Integer>> bigrams, final int bufferType,
471            final FormatSpec.FormatOptions formatOptions, final String message) {
472        final String dictName = "runReadUnigrams";
473        final String dictVersion = Long.toString(System.currentTimeMillis());
474        final File file = setUpDictionaryFile(dictName, dictVersion);
475
476        // making the dictionary from lists of words.
477        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
478                getDictionaryOptions(dictName, dictVersion));
479        addUnigrams(words.size(), dict, words, null /* shortcutMap */);
480        addBigrams(dict, words, bigrams);
481
482        timeWritingDictToFile(file, dict, formatOptions);
483
484        long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
485                formatOptions, dict.mOptions);
486        long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
487                bufferType, formatOptions, dict.mOptions);
488
489        return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
490                + " : " + message + " : " + outputOptions(bufferType, formatOptions);
491    }
492
493    private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
494            final int bufferType, final FormatSpec.FormatOptions formatOptions) {
495        results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
496                formatOptions, "unigram"));
497        results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
498                formatOptions, "chain"));
499        results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
500                formatOptions, "star"));
501    }
502
503    public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
504        final ArrayList<String> results = CollectionUtils.newArrayList();
505
506        runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION2);
507        runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
508        runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
509        runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
510        runReadUnigramsAndBigramsTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
511
512        for (final String result : results) {
513            Log.d(TAG, result);
514        }
515    }
516
517    public void testReadUnigramsAndBigramsBinaryWithByteArray() {
518        final ArrayList<String> results = CollectionUtils.newArrayList();
519
520        runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION2);
521        runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
522        runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
523        runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
524        runReadUnigramsAndBigramsTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
525
526        for (final String result : results) {
527            Log.d(TAG, result);
528        }
529    }
530
531    // Tests for getTerminalPosition
532    private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
533        if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
534
535        FileHeader fileHeader = null;
536        try {
537            fileHeader = dictDecoder.readHeader();
538        } catch (IOException e) {
539            return null;
540        } catch (UnsupportedFormatException e) {
541            return null;
542        }
543        if (fileHeader == null) return null;
544        return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
545                address, fileHeader.mFormatOptions).mWord;
546    }
547
548    private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
549            int index, boolean contained) {
550        final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
551        long diff = -1;
552        int position = -1;
553        try {
554            final long now = System.nanoTime();
555            position = dictDecoder.getTerminalPosition(word);
556            diff = System.nanoTime() - now;
557        } catch (IOException e) {
558            Log.e(TAG, "IOException while getTerminalPosition", e);
559        } catch (UnsupportedFormatException e) {
560            Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
561        }
562
563        assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
564        if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
565        return diff;
566    }
567
568    private void runGetTerminalPosition(final ArrayList<String> words,
569            final SparseArray<List<Integer>> bigrams, final int bufferType,
570            final FormatOptions formatOptions, final String message) {
571        final String dictName = "testGetTerminalPosition";
572        final String dictVersion = Long.toString(System.currentTimeMillis());
573        final File file = setUpDictionaryFile(dictName, dictVersion);
574
575        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
576                getDictionaryOptions(dictName, dictVersion));
577        addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
578        addBigrams(dict, words, bigrams);
579        timeWritingDictToFile(file, dict, formatOptions);
580
581        final DictDecoder dictDecoder = getDictDecoder(file, DictDecoder.USE_BYTEARRAY,
582                formatOptions, dict.mOptions);
583        try {
584            dictDecoder.openDictBuffer();
585        } catch (IOException e) {
586            // ignore
587            Log.e(TAG, "IOException while opening the buffer", e);
588        }
589        assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
590
591        try {
592            // too long word
593            final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
594            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
595
596            // null
597            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
598
599            // empty string
600            assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
601        } catch (IOException e) {
602        } catch (UnsupportedFormatException e) {
603        }
604
605        // Test a word that is contained within the dictionary.
606        long sum = 0;
607        for (int i = 0; i < sWords.size(); ++i) {
608            final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
609            sum += time == -1 ? 0 : time;
610        }
611        Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
612                + " : " + outputOptions(bufferType, formatOptions));
613
614        // Test a word that isn't contained within the dictionary.
615        final Random random = new Random((int)System.currentTimeMillis());
616        final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
617                random);
618        for (int i = 0; i < 1000; ++i) {
619            final String word = CodePointUtils.generateWord(random, codePointSet);
620            if (sWords.indexOf(word) != -1) continue;
621            checkGetTerminalPosition(dictDecoder, word, i, false);
622        }
623    }
624
625    private void runGetTerminalPositionTests(final ArrayList<String> results, final int bufferType,
626            final FormatOptions formatOptions) {
627        runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
628    }
629
630    public void testGetTerminalPosition() {
631        final ArrayList<String> results = CollectionUtils.newArrayList();
632
633        runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION2);
634        runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITHOUT_DYNAMIC_UPDATE);
635        runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION3_WITH_DYNAMIC_UPDATE);
636        runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITHOUT_DYNAMIC_UPDATE);
637        runGetTerminalPositionTests(results, USE_BYTE_ARRAY, VERSION4_WITH_DYNAMIC_UPDATE);
638
639        runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION2);
640        runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITHOUT_DYNAMIC_UPDATE);
641        runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION3_WITH_DYNAMIC_UPDATE);
642        runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITHOUT_DYNAMIC_UPDATE);
643        runGetTerminalPositionTests(results, USE_BYTE_BUFFER, VERSION4_WITH_DYNAMIC_UPDATE);
644
645        for (final String result : results) {
646            Log.d(TAG, result);
647        }
648    }
649
650    public void testDeleteWord() {
651        final String dictName = "testDeleteWord";
652        final String dictVersion = Long.toString(System.currentTimeMillis());
653        final File file = setUpDictionaryFile(dictName, dictVersion);
654
655        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
656                new FusionDictionary.DictionaryOptions(
657                        new HashMap<String, String>(), false, false));
658        addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
659        timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
660
661        final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file, DictDecoder.USE_BYTEARRAY);
662        try {
663            dictDecoder.openDictBuffer();
664        } catch (IOException e) {
665            // ignore
666            Log.e(TAG, "IOException while opening the buffer", e);
667        }
668        assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
669
670        try {
671            MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
672                    dictDecoder.getTerminalPosition(sWords.get(0)));
673            DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
674            assertEquals(FormatSpec.NOT_VALID_WORD,
675                    dictDecoder.getTerminalPosition(sWords.get(0)));
676
677            MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
678                    dictDecoder.getTerminalPosition(sWords.get(5)));
679            DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
680            assertEquals(FormatSpec.NOT_VALID_WORD,
681                    dictDecoder.getTerminalPosition(sWords.get(5)));
682        } catch (IOException e) {
683        } catch (UnsupportedFormatException e) {
684        }
685    }
686}
687