BinaryDictionaryTests.java revision 88fa47a27d45f6460971d0d223aa558e121b3478
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.text.TextUtils;
22import android.util.Pair;
23
24import com.android.inputmethod.latin.makedict.CodePointUtils;
25import com.android.inputmethod.latin.makedict.FormatSpec;
26import com.android.inputmethod.latin.makedict.WeightedString;
27import com.android.inputmethod.latin.makedict.WordProperty;
28import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
29import com.android.inputmethod.latin.utils.FileUtils;
30import com.android.inputmethod.latin.utils.LanguageModelParam;
31
32import java.io.File;
33import java.io.IOException;
34import java.util.ArrayList;
35import java.util.HashMap;
36import java.util.HashSet;
37import java.util.Locale;
38import java.util.Map;
39import java.util.Random;
40
41// TODO Use the seed passed as an argument for makedict test.
42@LargeTest
43public class BinaryDictionaryTests extends AndroidTestCase {
44    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
45    private static final String TEST_LOCALE = "test";
46    private static final int[] DICT_FORMAT_VERSIONS =
47            new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
48
49    private static boolean canCheckBigramProbability(final int formatVersion) {
50        return formatVersion > FormatSpec.VERSION401;
51    }
52
53    private static boolean supportsBeginningOfSentence(final int formatVersion) {
54        return formatVersion > FormatSpec.VERSION401;
55    }
56
57    private File createEmptyDictionaryAndGetFile(final String dictId,
58            final int formatVersion) throws IOException {
59        if (formatVersion == FormatSpec.VERSION4
60                || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
61                || formatVersion == FormatSpec.VERSION4_DEV) {
62            return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
63        } else {
64            throw new IOException("Dictionary format version " + formatVersion
65                    + " is not supported.");
66        }
67    }
68
69    private File createEmptyVer4DictionaryAndGetFile(final String dictId,
70            final int formatVersion) throws IOException {
71        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
72                getContext().getCacheDir());
73        file.delete();
74        file.mkdir();
75        Map<String, String> attributeMap = new HashMap<>();
76        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
77                Locale.ENGLISH, attributeMap)) {
78            return file;
79        } else {
80            throw new IOException("Empty dictionary " + file.getAbsolutePath()
81                    + " cannot be created. Format version: " + formatVersion);
82        }
83    }
84
85    public void testIsValidDictionary() {
86        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
87            testIsValidDictionary(formatVersion);
88        }
89    }
90
91    private void testIsValidDictionary(final int formatVersion) {
92        File dictFile = null;
93        try {
94            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
95        } catch (IOException e) {
96            fail("IOException while writing an initial dictionary : " + e);
97        }
98        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
99                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
100                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
101        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
102                binaryDictionary.isValidDictionary());
103        binaryDictionary.close();
104        assertFalse("binaryDictionary must be invalid after closing.",
105                binaryDictionary.isValidDictionary());
106        FileUtils.deleteRecursively(dictFile);
107        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
108                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
109                TEST_LOCALE, true /* isUpdatable */);
110        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
111                binaryDictionary.isValidDictionary());
112        binaryDictionary.close();
113    }
114
115    public void testConstructingDictionaryOnMemory() {
116        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
117            testConstructingDictionaryOnMemory(formatVersion);
118        }
119    }
120
121    private void testConstructingDictionaryOnMemory(final int formatVersion) {
122        File dictFile = null;
123        try {
124            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
125        } catch (IOException e) {
126            fail("IOException while writing an initial dictionary : " + e);
127        }
128        FileUtils.deleteRecursively(dictFile);
129        assertFalse(dictFile.exists());
130        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
131                true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
132                new HashMap<String, String>());
133        assertTrue(binaryDictionary.isValidDictionary());
134        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
135        final int probability = 100;
136        addUnigramWord(binaryDictionary, "word", probability);
137        assertEquals(probability, binaryDictionary.getFrequency("word"));
138        assertFalse(dictFile.exists());
139        binaryDictionary.flush();
140        assertTrue(dictFile.exists());
141        assertTrue(binaryDictionary.isValidDictionary());
142        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
143        assertEquals(probability, binaryDictionary.getFrequency("word"));
144        binaryDictionary.close();
145        dictFile.delete();
146    }
147
148    public void testAddTooLongWord() {
149        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
150            testAddTooLongWord(formatVersion);
151        }
152    }
153
154    private void testAddTooLongWord(final int formatVersion) {
155        File dictFile = null;
156        try {
157            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
158        } catch (IOException e) {
159            fail("IOException while writing an initial dictionary : " + e);
160        }
161        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
162                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
163                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
164
165        final StringBuffer stringBuilder = new StringBuffer();
166        for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
167            stringBuilder.append('a');
168        }
169        final String validLongWord = stringBuilder.toString();
170        stringBuilder.append('a');
171        final String invalidLongWord = stringBuilder.toString();
172        final int probability = 100;
173        addUnigramWord(binaryDictionary, "aaa", probability);
174        addUnigramWord(binaryDictionary, validLongWord, probability);
175        addUnigramWord(binaryDictionary, invalidLongWord, probability);
176        // Too long short cut.
177        binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
178                10 /* shortcutProbability */, false /* isBeginningOfSentence */,
179                false /* isNotAWord */, false /* isBlacklisted */,
180                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
181        addUnigramWord(binaryDictionary, "abc", probability);
182        final int updatedProbability = 200;
183        // Update.
184        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
185        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
186        addUnigramWord(binaryDictionary, "abc", updatedProbability);
187
188        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
189        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
190        assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
191                binaryDictionary.getFrequency(invalidLongWord));
192        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
193        dictFile.delete();
194    }
195
196    private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
197            final int probability) {
198        binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
199                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
200                false /* isBeginningOfSentence */, false /* isNotAWord */,
201                false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
202    }
203
204    private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
205            final String word1, final int probability) {
206        binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability,
207                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
208    }
209
210    private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
211            final String word0, final String word1) {
212        return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1);
213    }
214
215    private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
216            final String word0, final String word1) {
217        binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1);
218    }
219
220    private static int getBigramProbability(final BinaryDictionary binaryDictionary,
221            final String word0,  final String word1) {
222        return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1);
223    }
224
225    public void testAddUnigramWord() {
226        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
227            testAddUnigramWord(formatVersion);
228        }
229    }
230
231    private void testAddUnigramWord(final int formatVersion) {
232        File dictFile = null;
233        try {
234            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
235        } catch (IOException e) {
236            fail("IOException while writing an initial dictionary : " + e);
237        }
238        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
239                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
240                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
241
242        final int probability = 100;
243        addUnigramWord(binaryDictionary, "aaa", probability);
244        // Reallocate and create.
245        addUnigramWord(binaryDictionary, "aab", probability);
246        // Insert into children.
247        addUnigramWord(binaryDictionary, "aac", probability);
248        // Make terminal.
249        addUnigramWord(binaryDictionary, "aa", probability);
250        // Create children.
251        addUnigramWord(binaryDictionary, "aaaa", probability);
252        // Reallocate and make termianl.
253        addUnigramWord(binaryDictionary, "a", probability);
254
255        final int updatedProbability = 200;
256        // Update.
257        addUnigramWord(binaryDictionary, "aaa", updatedProbability);
258
259        assertEquals(probability, binaryDictionary.getFrequency("aab"));
260        assertEquals(probability, binaryDictionary.getFrequency("aac"));
261        assertEquals(probability, binaryDictionary.getFrequency("aa"));
262        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
263        assertEquals(probability, binaryDictionary.getFrequency("a"));
264        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
265
266        dictFile.delete();
267    }
268
269    public void testRandomlyAddUnigramWord() {
270        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
271            testRandomlyAddUnigramWord(formatVersion);
272        }
273    }
274
275    private void testRandomlyAddUnigramWord(final int formatVersion) {
276        final int wordCount = 1000;
277        final int codePointSetSize = 50;
278        final long seed = System.currentTimeMillis();
279
280        File dictFile = null;
281        try {
282            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
283        } catch (IOException e) {
284            fail("IOException while writing an initial dictionary : " + e);
285        }
286        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
287                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
288                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
289
290        final HashMap<String, Integer> probabilityMap = new HashMap<>();
291        // Test a word that isn't contained within the dictionary.
292        final Random random = new Random(seed);
293        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
294        for (int i = 0; i < wordCount; ++i) {
295            final String word = CodePointUtils.generateWord(random, codePointSet);
296            probabilityMap.put(word, random.nextInt(0xFF));
297        }
298        for (String word : probabilityMap.keySet()) {
299            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
300        }
301        for (String word : probabilityMap.keySet()) {
302            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
303        }
304        dictFile.delete();
305    }
306
307    public void testAddBigramWords() {
308        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
309            testAddBigramWords(formatVersion);
310        }
311    }
312
313    private void testAddBigramWords(final int formatVersion) {
314        File dictFile = null;
315        try {
316            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
317        } catch (IOException e) {
318            fail("IOException while writing an initial dictionary : " + e);
319        }
320        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
321                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
322                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
323
324        final int unigramProbability = 100;
325        final int bigramProbability = 150;
326        final int updatedBigramProbability = 200;
327        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
328        addUnigramWord(binaryDictionary, "abb", unigramProbability);
329        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
330        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
331        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
332        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
333        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
334
335        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
336        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
337        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
338        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
339        if (canCheckBigramProbability(formatVersion)) {
340            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
341            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
342            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
343            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
344        }
345
346        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
347        if (canCheckBigramProbability(formatVersion)) {
348            assertEquals(updatedBigramProbability,
349                    getBigramProbability(binaryDictionary, "aaa", "abb"));
350        }
351
352        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
353        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
354        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
355        assertEquals(Dictionary.NOT_A_PROBABILITY,
356                getBigramProbability(binaryDictionary, "bcc", "aaa"));
357        assertEquals(Dictionary.NOT_A_PROBABILITY,
358                getBigramProbability(binaryDictionary, "bcc", "bbc"));
359        assertEquals(Dictionary.NOT_A_PROBABILITY,
360                getBigramProbability(binaryDictionary, "aaa", "aaa"));
361
362        // Testing bigram link.
363        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
364        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
365        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
366        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
367        addUnigramWord(binaryDictionary, "abc", unigramProbability);
368        addUnigramWord(binaryDictionary, "f", unigramProbability);
369
370        if (canCheckBigramProbability(formatVersion)) {
371            assertEquals(bigramProbability,
372                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
373        }
374        assertEquals(Dictionary.NOT_A_PROBABILITY,
375                getBigramProbability(binaryDictionary, "abcde", "fgh"));
376        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
377        if (canCheckBigramProbability(formatVersion)) {
378            assertEquals(updatedBigramProbability,
379                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
380        }
381
382        dictFile.delete();
383    }
384
385    public void testRandomlyAddBigramWords() {
386        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
387            testRandomlyAddBigramWords(formatVersion);
388        }
389    }
390
391    private void testRandomlyAddBigramWords(final int formatVersion) {
392        final int wordCount = 100;
393        final int bigramCount = 1000;
394        final int codePointSetSize = 50;
395        final long seed = System.currentTimeMillis();
396        final Random random = new Random(seed);
397
398        File dictFile = null;
399        try {
400            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
401        } catch (IOException e) {
402            fail("IOException while writing an initial dictionary : " + e);
403        }
404        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
405                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
406                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
407
408        final ArrayList<String> words = new ArrayList<>();
409        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
410        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
411        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
412        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
413
414        for (int i = 0; i < wordCount; ++i) {
415            final String word = CodePointUtils.generateWord(random, codePointSet);
416            words.add(word);
417            final int unigramProbability = random.nextInt(0xFF);
418            unigramProbabilities.put(word, unigramProbability);
419            addUnigramWord(binaryDictionary, word, unigramProbability);
420        }
421
422        for (int i = 0; i < bigramCount; i++) {
423            final String word0 = words.get(random.nextInt(wordCount));
424            final String word1 = words.get(random.nextInt(wordCount));
425            if (TextUtils.equals(word0, word1)) {
426                continue;
427            }
428            final Pair<String, String> bigram = new Pair<>(word0, word1);
429            bigramWords.add(bigram);
430            final int unigramProbability = unigramProbabilities.get(word1);
431            final int bigramProbability =
432                    unigramProbability + random.nextInt(0xFF - unigramProbability);
433            bigramProbabilities.put(bigram, bigramProbability);
434            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
435        }
436
437        for (final Pair<String, String> bigram : bigramWords) {
438            final int bigramProbability = bigramProbabilities.get(bigram);
439            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
440                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
441            if (canCheckBigramProbability(formatVersion)) {
442                assertEquals(bigramProbability,
443                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
444            }
445        }
446
447        dictFile.delete();
448    }
449
450    public void testRemoveBigramWords() {
451        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
452            testRemoveBigramWords(formatVersion);
453        }
454    }
455
456    private void testRemoveBigramWords(final int formatVersion) {
457        File dictFile = null;
458        try {
459            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
460        } catch (IOException e) {
461            fail("IOException while writing an initial dictionary : " + e);
462        }
463        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
464                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
465                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
466        final int unigramProbability = 100;
467        final int bigramProbability = 150;
468        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
469        addUnigramWord(binaryDictionary, "abb", unigramProbability);
470        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
471        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
472        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
473        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
474        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
475
476        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
477        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
478        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
479        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
480
481        removeBigramEntry(binaryDictionary, "aaa", "abb");
482        assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
483        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
484        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
485
486
487        removeBigramEntry(binaryDictionary, "aaa", "bcc");
488        assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
489        removeBigramEntry(binaryDictionary, "abb", "aaa");
490        assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
491        removeBigramEntry(binaryDictionary, "abb", "bcc");
492        assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
493
494        removeBigramEntry(binaryDictionary, "aaa", "abb");
495        // Test remove non-existing bigram operation.
496        removeBigramEntry(binaryDictionary, "aaa", "abb");
497        removeBigramEntry(binaryDictionary, "bcc", "aaa");
498
499        dictFile.delete();
500    }
501
502    public void testFlushDictionary() {
503        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
504            testFlushDictionary(formatVersion);
505        }
506    }
507
508    private void testFlushDictionary(final int formatVersion) {
509        File dictFile = null;
510        try {
511            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
512        } catch (IOException e) {
513            fail("IOException while writing an initial dictionary : " + e);
514        }
515        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
516                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
517                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
518
519        final int probability = 100;
520        addUnigramWord(binaryDictionary, "aaa", probability);
521        addUnigramWord(binaryDictionary, "abcd", probability);
522        // Close without flushing.
523        binaryDictionary.close();
524
525        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
526                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
527                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
528
529        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
530        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
531
532        addUnigramWord(binaryDictionary, "aaa", probability);
533        addUnigramWord(binaryDictionary, "abcd", probability);
534        binaryDictionary.flush();
535        binaryDictionary.close();
536
537        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
538                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
539                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
540
541        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
542        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
543        addUnigramWord(binaryDictionary, "bcde", probability);
544        binaryDictionary.flush();
545        binaryDictionary.close();
546
547        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
548                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
549                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
550        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
551        binaryDictionary.close();
552
553        dictFile.delete();
554    }
555
556    public void testFlushWithGCDictionary() {
557        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
558            testFlushWithGCDictionary(formatVersion);
559        }
560    }
561
562    private void testFlushWithGCDictionary(final int formatVersion) {
563        File dictFile = null;
564        try {
565            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
566        } catch (IOException e) {
567            fail("IOException while writing an initial dictionary : " + e);
568        }
569        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
570                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
571                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
572
573        final int unigramProbability = 100;
574        final int bigramProbability = 150;
575        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
576        addUnigramWord(binaryDictionary, "abb", unigramProbability);
577        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
578        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
579        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
580        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
581        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
582        binaryDictionary.flushWithGC();
583        binaryDictionary.close();
584
585        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
586                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
587                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
588        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
589        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
590        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
591        if (canCheckBigramProbability(formatVersion)) {
592            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
593            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
594            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
595            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
596        }
597        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
598        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
599        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
600        binaryDictionary.flushWithGC();
601        binaryDictionary.close();
602
603        dictFile.delete();
604    }
605
606    public void testAddBigramWordsAndFlashWithGC() {
607        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
608            testAddBigramWordsAndFlashWithGC(formatVersion);
609        }
610    }
611
612    // TODO: Evaluate performance of GC
613    private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
614        final int wordCount = 100;
615        final int bigramCount = 1000;
616        final int codePointSetSize = 30;
617        final long seed = System.currentTimeMillis();
618        final Random random = new Random(seed);
619
620        File dictFile = null;
621        try {
622            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
623        } catch (IOException e) {
624            fail("IOException while writing an initial dictionary : " + e);
625        }
626
627        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
628                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
629                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
630
631        final ArrayList<String> words = new ArrayList<>();
632        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
633        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
634        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
635        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
636
637        for (int i = 0; i < wordCount; ++i) {
638            final String word = CodePointUtils.generateWord(random, codePointSet);
639            words.add(word);
640            final int unigramProbability = random.nextInt(0xFF);
641            unigramProbabilities.put(word, unigramProbability);
642            addUnigramWord(binaryDictionary, word, unigramProbability);
643        }
644
645        for (int i = 0; i < bigramCount; i++) {
646            final String word0 = words.get(random.nextInt(wordCount));
647            final String word1 = words.get(random.nextInt(wordCount));
648            if (TextUtils.equals(word0, word1)) {
649                continue;
650            }
651            final Pair<String, String> bigram = new Pair<>(word0, word1);
652            bigramWords.add(bigram);
653            final int unigramProbability = unigramProbabilities.get(word1);
654            final int bigramProbability =
655                    unigramProbability + random.nextInt(0xFF - unigramProbability);
656            bigramProbabilities.put(bigram, bigramProbability);
657            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
658        }
659
660        binaryDictionary.flushWithGC();
661        binaryDictionary.close();
662        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
663                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
664                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
665
666
667        for (final Pair<String, String> bigram : bigramWords) {
668            final int bigramProbability = bigramProbabilities.get(bigram);
669            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
670                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
671            if (canCheckBigramProbability(formatVersion)) {
672                assertEquals(bigramProbability,
673                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
674            }
675        }
676
677        dictFile.delete();
678    }
679
680    public void testRandomOperationsAndFlashWithGC() {
681        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
682            testRandomOperationsAndFlashWithGC(formatVersion);
683        }
684    }
685
686    private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
687        final int flashWithGCIterationCount = 50;
688        final int operationCountInEachIteration = 200;
689        final int initialUnigramCount = 100;
690        final float addUnigramProb = 0.5f;
691        final float addBigramProb = 0.8f;
692        final float removeBigramProb = 0.2f;
693        final int codePointSetSize = 30;
694
695        final long seed = System.currentTimeMillis();
696        final Random random = new Random(seed);
697
698        File dictFile = null;
699        try {
700            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
701        } catch (IOException e) {
702            fail("IOException while writing an initial dictionary : " + e);
703        }
704
705        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
706                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
707                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
708        final ArrayList<String> words = new ArrayList<>();
709        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
710        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
711        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
712        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
713        for (int i = 0; i < initialUnigramCount; ++i) {
714            final String word = CodePointUtils.generateWord(random, codePointSet);
715            words.add(word);
716            final int unigramProbability = random.nextInt(0xFF);
717            unigramProbabilities.put(word, unigramProbability);
718            addUnigramWord(binaryDictionary, word, unigramProbability);
719        }
720        binaryDictionary.flushWithGC();
721        binaryDictionary.close();
722
723        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
724            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
725                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
726                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
727            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
728                // Add unigram.
729                if (random.nextFloat() < addUnigramProb) {
730                    final String word = CodePointUtils.generateWord(random, codePointSet);
731                    words.add(word);
732                    final int unigramProbability = random.nextInt(0xFF);
733                    unigramProbabilities.put(word, unigramProbability);
734                    addUnigramWord(binaryDictionary, word, unigramProbability);
735                }
736                // Add bigram.
737                if (random.nextFloat() < addBigramProb && words.size() > 2) {
738                    final int word0Index = random.nextInt(words.size());
739                    int word1Index = random.nextInt(words.size() - 1);
740                    if (word0Index <= word1Index) {
741                        word1Index++;
742                    }
743                    final String word0 = words.get(word0Index);
744                    final String word1 = words.get(word1Index);
745                    if (TextUtils.equals(word0, word1)) {
746                        continue;
747                    }
748                    final int unigramProbability = unigramProbabilities.get(word1);
749                    final int bigramProbability =
750                            unigramProbability + random.nextInt(0xFF - unigramProbability);
751                    final Pair<String, String> bigram = new Pair<>(word0, word1);
752                    bigramWords.add(bigram);
753                    bigramProbabilities.put(bigram, bigramProbability);
754                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
755                }
756                // Remove bigram.
757                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
758                    final int bigramIndex = random.nextInt(bigramWords.size());
759                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
760                    bigramWords.remove(bigramIndex);
761                    bigramProbabilities.remove(bigram);
762                    removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
763                }
764            }
765
766            // Test whether the all unigram operations are collectlly handled.
767            for (int i = 0; i < words.size(); i++) {
768                final String word = words.get(i);
769                final int unigramProbability = unigramProbabilities.get(word);
770                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
771            }
772            // Test whether the all bigram operations are collectlly handled.
773            for (int i = 0; i < bigramWords.size(); i++) {
774                final Pair<String, String> bigram = bigramWords.get(i);
775                final int probability;
776                if (bigramProbabilities.containsKey(bigram)) {
777                    final int bigramProbability = bigramProbabilities.get(bigram);
778                    probability = bigramProbability;
779                } else {
780                    probability = Dictionary.NOT_A_PROBABILITY;
781                }
782
783                if (canCheckBigramProbability(formatVersion)) {
784                    assertEquals(probability,
785                            getBigramProbability(binaryDictionary, bigram.first, bigram.second));
786                }
787                assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
788                        isValidBigram(binaryDictionary, bigram.first, bigram.second));
789            }
790            binaryDictionary.flushWithGC();
791            binaryDictionary.close();
792        }
793
794        dictFile.delete();
795    }
796
797    public void testAddManyUnigramsAndFlushWithGC() {
798        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
799            testAddManyUnigramsAndFlushWithGC(formatVersion);
800        }
801    }
802
803    private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
804        final int flashWithGCIterationCount = 3;
805        final int codePointSetSize = 50;
806
807        final long seed = System.currentTimeMillis();
808        final Random random = new Random(seed);
809
810        File dictFile = null;
811        try {
812            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
813        } catch (IOException e) {
814            fail("IOException while writing an initial dictionary : " + e);
815        }
816
817        final ArrayList<String> words = new ArrayList<>();
818        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
819        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
820
821        BinaryDictionary binaryDictionary;
822        for (int i = 0; i < flashWithGCIterationCount; i++) {
823            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
824                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
825                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
826            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
827                final String word = CodePointUtils.generateWord(random, codePointSet);
828                words.add(word);
829                final int unigramProbability = random.nextInt(0xFF);
830                unigramProbabilities.put(word, unigramProbability);
831                addUnigramWord(binaryDictionary, word, unigramProbability);
832            }
833
834            for (int j = 0; j < words.size(); j++) {
835                final String word = words.get(j);
836                final int unigramProbability = unigramProbabilities.get(word);
837                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
838            }
839
840            binaryDictionary.flushWithGC();
841            binaryDictionary.close();
842        }
843
844        dictFile.delete();
845    }
846
847    public void testUnigramAndBigramCount() {
848        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
849            testUnigramAndBigramCount(formatVersion);
850        }
851    }
852
853    private void testUnigramAndBigramCount(final int formatVersion) {
854        final int flashWithGCIterationCount = 10;
855        final int codePointSetSize = 50;
856        final int unigramCountPerIteration = 1000;
857        final int bigramCountPerIteration = 2000;
858        final long seed = System.currentTimeMillis();
859        final Random random = new Random(seed);
860
861        File dictFile = null;
862        try {
863            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
864        } catch (IOException e) {
865            fail("IOException while writing an initial dictionary : " + e);
866        }
867
868        final ArrayList<String> words = new ArrayList<>();
869        final HashSet<Pair<String, String>> bigrams = new HashSet<>();
870        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
871
872        BinaryDictionary binaryDictionary;
873        for (int i = 0; i < flashWithGCIterationCount; i++) {
874            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
875                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
876                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
877            for (int j = 0; j < unigramCountPerIteration; j++) {
878                final String word = CodePointUtils.generateWord(random, codePointSet);
879                words.add(word);
880                final int unigramProbability = random.nextInt(0xFF);
881                addUnigramWord(binaryDictionary, word, unigramProbability);
882            }
883            for (int j = 0; j < bigramCountPerIteration; j++) {
884                final String word0 = words.get(random.nextInt(words.size()));
885                final String word1 = words.get(random.nextInt(words.size()));
886                if (TextUtils.equals(word0, word1)) {
887                    continue;
888                }
889                bigrams.add(new Pair<>(word0, word1));
890                final int bigramProbability = random.nextInt(0xF);
891                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
892            }
893            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
894                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
895            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
896                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
897            binaryDictionary.flushWithGC();
898            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
899                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
900            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
901                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
902            binaryDictionary.close();
903        }
904
905        dictFile.delete();
906    }
907
908    public void testAddMultipleDictionaryEntries() {
909        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
910            testAddMultipleDictionaryEntries(formatVersion);
911        }
912    }
913
914    private void testAddMultipleDictionaryEntries(final int formatVersion) {
915        final int codePointSetSize = 20;
916        final int lmParamCount = 1000;
917        final double bigramContinueRate = 0.9;
918        final long seed = System.currentTimeMillis();
919        final Random random = new Random(seed);
920
921        File dictFile = null;
922        try {
923            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
924        } catch (IOException e) {
925            fail("IOException while writing an initial dictionary : " + e);
926        }
927
928        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
929        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
930        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
931
932        final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
933        String prevWord = null;
934        for (int i = 0; i < languageModelParams.length; i++) {
935            final String word = CodePointUtils.generateWord(random, codePointSet);
936            final int probability = random.nextInt(0xFF);
937            final int bigramProbability = probability + random.nextInt(0xFF - probability);
938            unigramProbabilities.put(word, probability);
939            if (prevWord == null) {
940                languageModelParams[i] = new LanguageModelParam(word, probability,
941                        BinaryDictionary.NOT_A_VALID_TIMESTAMP);
942            } else {
943                languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
944                        bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
945                bigramProbabilities.put(new Pair<>(prevWord, word),
946                        bigramProbability);
947            }
948            prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
949        }
950
951        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
952                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
953                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
954        binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
955
956        for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
957            assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
958        }
959
960        for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
961            final String word0 = entry.getKey().first;
962            final String word1 = entry.getKey().second;
963            final int bigramProbability = entry.getValue();
964            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
965                    isValidBigram(binaryDictionary, word0, word1));
966            if (canCheckBigramProbability(formatVersion)) {
967                assertEquals(bigramProbability,
968                        getBigramProbability(binaryDictionary, word0, word1));
969            }
970        }
971    }
972
973    public void testGetWordProperties() {
974        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
975            testGetWordProperties(formatVersion);
976        }
977    }
978
979    private void testGetWordProperties(final int formatVersion) {
980        final long seed = System.currentTimeMillis();
981        final Random random = new Random(seed);
982        final int UNIGRAM_COUNT = 1000;
983        final int BIGRAM_COUNT = 1000;
984        final int codePointSetSize = 20;
985        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
986
987        File dictFile = null;
988        try {
989            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
990        } catch (IOException e) {
991            fail("IOException while writing an initial dictionary : " + e);
992        }
993        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
994                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
995                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
996
997        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
998                false /* isBeginningOfSentence */);
999        assertFalse(invalidWordProperty.isValid());
1000
1001        final ArrayList<String> words = new ArrayList<>();
1002        final HashMap<String, Integer> wordProbabilities = new HashMap<>();
1003        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1004        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1005
1006        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1007            final String word = CodePointUtils.generateWord(random, codePointSet);
1008            final int unigramProbability = random.nextInt(0xFF);
1009            final boolean isNotAWord = random.nextBoolean();
1010            final boolean isBlacklisted = random.nextBoolean();
1011            // TODO: Add tests for historical info.
1012            binaryDictionary.addUnigramEntry(word, unigramProbability,
1013                    null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
1014                    false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
1015                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
1016            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1017                binaryDictionary.flushWithGC();
1018            }
1019            words.add(word);
1020            wordProbabilities.put(word, unigramProbability);
1021            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1022                    false /* isBeginningOfSentence */);
1023            assertEquals(word, wordProperty.mWord);
1024            assertTrue(wordProperty.isValid());
1025            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
1026            assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
1027            assertEquals(false, wordProperty.mHasBigrams);
1028            assertEquals(false, wordProperty.mHasShortcuts);
1029            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
1030            assertTrue(wordProperty.mShortcutTargets.isEmpty());
1031        }
1032
1033        for (int i = 0; i < BIGRAM_COUNT; i++) {
1034            final int word0Index = random.nextInt(wordProbabilities.size());
1035            final int word1Index = random.nextInt(wordProbabilities.size());
1036            if (word0Index == word1Index) {
1037                continue;
1038            }
1039            final String word0 = words.get(word0Index);
1040            final String word1 = words.get(word1Index);
1041            final int unigramProbability = wordProbabilities.get(word1);
1042            final int bigramProbability =
1043                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1044            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1045            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1046                binaryDictionary.flushWithGC();
1047            }
1048            if (!bigrams.containsKey(word0)) {
1049                final HashSet<String> bigramWord1s = new HashSet<>();
1050                bigrams.put(word0, bigramWord1s);
1051            }
1052            bigrams.get(word0).add(word1);
1053            bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
1054        }
1055
1056        for (int i = 0; i < words.size(); i++) {
1057            final String word0 = words.get(i);
1058            if (!bigrams.containsKey(word0)) {
1059                continue;
1060            }
1061            final HashSet<String> bigramWord1s = bigrams.get(word0);
1062            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
1063                    false /* isBeginningOfSentence */);
1064            assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
1065            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1066                final String word1 = wordProperty.mBigrams.get(j).mWord;
1067                assertTrue(bigramWord1s.contains(word1));
1068                if (canCheckBigramProbability(formatVersion)) {
1069                    final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
1070                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1071                }
1072            }
1073        }
1074    }
1075
1076    public void testIterateAllWords() {
1077        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1078            testIterateAllWords(formatVersion);
1079        }
1080    }
1081
1082    private void testIterateAllWords(final int formatVersion) {
1083        final long seed = System.currentTimeMillis();
1084        final Random random = new Random(seed);
1085        final int UNIGRAM_COUNT = 1000;
1086        final int BIGRAM_COUNT = 1000;
1087        final int codePointSetSize = 20;
1088        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1089
1090        File dictFile = null;
1091        try {
1092            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1093        } catch (IOException e) {
1094            fail("IOException while writing an initial dictionary : " + e);
1095        }
1096        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1097                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1098                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1099
1100        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
1101                false /* isBeginningOfSentence */);
1102        assertFalse(invalidWordProperty.isValid());
1103
1104        final ArrayList<String> words = new ArrayList<>();
1105        final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
1106        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1107        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
1108                new HashMap<>();
1109
1110        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1111            final String word = CodePointUtils.generateWord(random, codePointSet);
1112            final int unigramProbability = random.nextInt(0xFF);
1113            addUnigramWord(binaryDictionary, word, unigramProbability);
1114            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1115                binaryDictionary.flushWithGC();
1116            }
1117            words.add(word);
1118            wordProbabilitiesToCheckLater.put(word, unigramProbability);
1119        }
1120
1121        for (int i = 0; i < BIGRAM_COUNT; i++) {
1122            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1123            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1124            if (word0Index == word1Index) {
1125                continue;
1126            }
1127            final String word0 = words.get(word0Index);
1128            final String word1 = words.get(word1Index);
1129            final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
1130            final int bigramProbability =
1131                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1132            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1133            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1134                binaryDictionary.flushWithGC();
1135            }
1136            if (!bigrams.containsKey(word0)) {
1137                final HashSet<String> bigramWord1s = new HashSet<>();
1138                bigrams.put(word0, bigramWord1s);
1139            }
1140            bigrams.get(word0).add(word1);
1141            bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
1142        }
1143
1144        final HashSet<String> wordSet = new HashSet<>(words);
1145        final HashSet<Pair<String, String>> bigramSet =
1146                new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
1147        int token = 0;
1148        do {
1149            final BinaryDictionary.GetNextWordPropertyResult result =
1150                    binaryDictionary.getNextWordProperty(token);
1151            final WordProperty wordProperty = result.mWordProperty;
1152            final String word0 = wordProperty.mWord;
1153            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
1154                    wordProperty.mProbabilityInfo.mProbability);
1155            wordSet.remove(word0);
1156            final HashSet<String> bigramWord1s = bigrams.get(word0);
1157            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1158                final String word1 = wordProperty.mBigrams.get(j).mWord;
1159                assertTrue(bigramWord1s.contains(word1));
1160                final Pair<String, String> bigram = new Pair<>(word0, word1);
1161                if (canCheckBigramProbability(formatVersion)) {
1162                    final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
1163                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1164                }
1165                bigramSet.remove(bigram);
1166            }
1167            token = result.mNextToken;
1168        } while (token != 0);
1169        assertTrue(wordSet.isEmpty());
1170        assertTrue(bigramSet.isEmpty());
1171    }
1172
1173    public void testAddShortcuts() {
1174        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1175            testAddShortcuts(formatVersion);
1176        }
1177    }
1178
1179    private void testAddShortcuts(final int formatVersion) {
1180        File dictFile = null;
1181        try {
1182            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1183        } catch (IOException e) {
1184            fail("IOException while writing an initial dictionary : " + e);
1185        }
1186        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1187                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1188                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1189
1190        final int unigramProbability = 100;
1191        final int shortcutProbability = 10;
1192        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1193                shortcutProbability, false /* isBeginningOfSentence */,
1194                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1195        WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
1196                false /* isBeginningOfSentence */);
1197        assertEquals(1, wordProperty.mShortcutTargets.size());
1198        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1199        assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
1200        final int updatedShortcutProbability = 2;
1201        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1202                updatedShortcutProbability, false /* isBeginningOfSentence */,
1203                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1204        wordProperty = binaryDictionary.getWordProperty("aaa",
1205                false /* isBeginningOfSentence */);
1206        assertEquals(1, wordProperty.mShortcutTargets.size());
1207        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1208        assertEquals(updatedShortcutProbability,
1209                wordProperty.mShortcutTargets.get(0).getProbability());
1210        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
1211                shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1212                false /* isBlacklisted */, 0 /* timestamp */);
1213        final HashMap<String, Integer> shortcutTargets = new HashMap<>();
1214        shortcutTargets.put("zzz", updatedShortcutProbability);
1215        shortcutTargets.put("yyy", shortcutProbability);
1216        wordProperty = binaryDictionary.getWordProperty("aaa",
1217                false /* isBeginningOfSentence */);
1218        assertEquals(2, wordProperty.mShortcutTargets.size());
1219        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1220            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1221            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1222                    shortcutTarget.getProbability());
1223            shortcutTargets.remove(shortcutTarget.mWord);
1224        }
1225        shortcutTargets.put("zzz", updatedShortcutProbability);
1226        shortcutTargets.put("yyy", shortcutProbability);
1227        binaryDictionary.flushWithGC();
1228        wordProperty = binaryDictionary.getWordProperty("aaa",
1229                false /* isBeginningOfSentence */);
1230        assertEquals(2, wordProperty.mShortcutTargets.size());
1231        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1232            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1233            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1234                    shortcutTarget.getProbability());
1235            shortcutTargets.remove(shortcutTarget.mWord);
1236        }
1237    }
1238
1239    public void testAddManyShortcuts() {
1240        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1241            testAddManyShortcuts(formatVersion);
1242        }
1243    }
1244
1245    private void testAddManyShortcuts(final int formatVersion) {
1246        final long seed = System.currentTimeMillis();
1247        final Random random = new Random(seed);
1248        final int UNIGRAM_COUNT = 1000;
1249        final int SHORTCUT_COUNT = 10000;
1250        final int codePointSetSize = 20;
1251        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1252
1253        final ArrayList<String> words = new ArrayList<>();
1254        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1255        final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
1256
1257        File dictFile = null;
1258        try {
1259            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1260        } catch (IOException e) {
1261            fail("IOException while writing an initial dictionary : " + e);
1262        }
1263        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1264                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1265                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1266
1267        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1268            final String word = CodePointUtils.generateWord(random, codePointSet);
1269            final int unigramProbability = random.nextInt(0xFF);
1270            addUnigramWord(binaryDictionary, word, unigramProbability);
1271            words.add(word);
1272            unigramProbabilities.put(word, unigramProbability);
1273            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1274                binaryDictionary.flushWithGC();
1275            }
1276        }
1277        for (int i = 0; i < SHORTCUT_COUNT; i++) {
1278            final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
1279            final int shortcutProbability = random.nextInt(0xF);
1280            final String word = words.get(random.nextInt(words.size()));
1281            final int unigramProbability = unigramProbabilities.get(word);
1282            binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
1283                    shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1284                    false /* isBlacklisted */, 0 /* timestamp */);
1285            if (shortcutTargets.containsKey(word)) {
1286                final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
1287                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1288            } else {
1289                final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
1290                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1291                shortcutTargets.put(word, shortcutTargetsOfWord);
1292            }
1293            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1294                binaryDictionary.flushWithGC();
1295            }
1296        }
1297
1298        for (final String word : words) {
1299            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1300                    false /* isBeginningOfSentence */);
1301            assertEquals((int)unigramProbabilities.get(word),
1302                    wordProperty.mProbabilityInfo.mProbability);
1303            if (!shortcutTargets.containsKey(word)) {
1304                // The word does not have shortcut targets.
1305                continue;
1306            }
1307            assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
1308            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1309                final String targetCodePonts = shortcutTarget.mWord;
1310                assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
1311                        shortcutTarget.getProbability());
1312            }
1313        }
1314    }
1315
1316    public void testDictMigration() {
1317        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1318            testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1319        }
1320    }
1321
1322    private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1323        File dictFile = null;
1324        try {
1325            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1326        } catch (IOException e) {
1327            fail("IOException while writing an initial dictionary : " + e);
1328        }
1329        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1330                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1331                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1332        final int unigramProbability = 100;
1333        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
1334        addUnigramWord(binaryDictionary, "bbb", unigramProbability);
1335        final int bigramProbability = 150;
1336        addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
1337        final int shortcutProbability = 10;
1338        binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
1339                false /* isBeginningOfSentence */, false /* isNotAWord */,
1340                false /* isBlacklisted */, 0 /* timestamp */);
1341        binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
1342                Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
1343                true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
1344        binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE,
1345                "aaa", bigramProbability, 0 /* timestamp */);
1346        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1347        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1348        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1349        assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
1350        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1351        assertTrue(binaryDictionary.isValidDictionary());
1352        assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
1353        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1354        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1355        if (canCheckBigramProbability(toFormatVersion)) {
1356            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
1357            assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
1358                    PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa"));
1359        }
1360        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1361        WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
1362                false /* isBeginningOfSentence */);
1363        assertEquals(1, wordProperty.mShortcutTargets.size());
1364        assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
1365        wordProperty = binaryDictionary.getWordProperty("ddd",
1366                false /* isBeginningOfSentence */);
1367        assertTrue(wordProperty.mIsBlacklistEntry);
1368        assertTrue(wordProperty.mIsNotAWord);
1369    }
1370
1371    public void testLargeDictMigration() {
1372        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1373            testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1374        }
1375    }
1376
1377    private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1378        final int UNIGRAM_COUNT = 3000;
1379        final int BIGRAM_COUNT = 3000;
1380        final int codePointSetSize = 50;
1381        final long seed = System.currentTimeMillis();
1382        final Random random = new Random(seed);
1383
1384        File dictFile = null;
1385        try {
1386            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1387        } catch (IOException e) {
1388            fail("IOException while writing an initial dictionary : " + e);
1389        }
1390        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1391                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1392                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1393
1394        final ArrayList<String> words = new ArrayList<>();
1395        final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
1396        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1397        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1398        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1399
1400        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1401            final String word = CodePointUtils.generateWord(random, codePointSet);
1402            final int unigramProbability = random.nextInt(0xFF);
1403            addUnigramWord(binaryDictionary, word, unigramProbability);
1404            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1405                binaryDictionary.flushWithGC();
1406            }
1407            words.add(word);
1408            unigramProbabilities.put(word, unigramProbability);
1409        }
1410
1411        for (int i = 0; i < BIGRAM_COUNT; i++) {
1412            final int word0Index = random.nextInt(words.size());
1413            final int word1Index = random.nextInt(words.size());
1414            if (word0Index == word1Index) {
1415                continue;
1416            }
1417            final String word0 = words.get(word0Index);
1418            final String word1 = words.get(word1Index);
1419            final int unigramProbability = unigramProbabilities.get(word1);
1420            final int bigramProbability =
1421                    random.nextInt(0xFF - unigramProbability) + unigramProbability;
1422            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1423            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1424                binaryDictionary.flushWithGC();
1425            }
1426            final Pair<String, String> bigram = new Pair<>(word0, word1);
1427            bigrams.add(bigram);
1428            bigramProbabilities.put(bigram, bigramProbability);
1429        }
1430        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1431
1432        for (final String word : words) {
1433            assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
1434        }
1435        assertEquals(unigramProbabilities.size(), Integer.parseInt(
1436                binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
1437
1438        for (final Pair<String, String> bigram : bigrams) {
1439            if (canCheckBigramProbability(toFormatVersion)) {
1440                assertEquals((int)bigramProbabilities.get(bigram),
1441                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
1442            }
1443            assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
1444        }
1445        assertEquals(bigramProbabilities.size(), Integer.parseInt(
1446                binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
1447    }
1448
1449    public void testBeginningOfSentence() {
1450        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1451            if (supportsBeginningOfSentence(formatVersion)) {
1452                testBeginningOfSentence(formatVersion);
1453            }
1454        }
1455    }
1456
1457    private void testBeginningOfSentence(final int formatVersion) {
1458        File dictFile = null;
1459        try {
1460            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1461        } catch (IOException e) {
1462            fail("IOException while writing an initial dictionary : " + e);
1463        }
1464        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1465                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1466                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1467        final int dummyProbability = 0;
1468        final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
1469        final int bigramProbability = 200;
1470        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
1471        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1472                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1473        assertEquals(bigramProbability,
1474                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1475        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1476                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1477        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
1478        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
1479                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1480        binaryDictionary.flushWithGC();
1481        assertEquals(bigramProbability,
1482                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1483        assertEquals(bigramProbability,
1484                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
1485    }
1486
1487    public void testGetMaxFrequencyOfExactMatches() {
1488        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1489            testGetMaxFrequencyOfExactMatches(formatVersion);
1490        }
1491    }
1492
1493    private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
1494        File dictFile = null;
1495        try {
1496            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1497        } catch (IOException e) {
1498            fail("IOException while writing an initial dictionary : " + e);
1499        }
1500        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1501                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1502                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1503        addUnigramWord(binaryDictionary, "abc", 10);
1504        addUnigramWord(binaryDictionary, "aBc", 15);
1505        assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1506        addUnigramWord(binaryDictionary, "ab'c", 20);
1507        assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1508        addUnigramWord(binaryDictionary, "a-b-c", 25);
1509        assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1510        addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
1511        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1512        addUnigramWord(binaryDictionary, "ab c", 255);
1513        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1514    }
1515}
1516