BinaryDictionaryTests.java revision d9b8602f4862c2c876e1499aad7ca7d77ea66595
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.text.TextUtils;
22import android.util.Pair;
23
24import com.android.inputmethod.latin.makedict.CodePointUtils;
25import com.android.inputmethod.latin.makedict.FormatSpec;
26import com.android.inputmethod.latin.makedict.WeightedString;
27import com.android.inputmethod.latin.makedict.WordProperty;
28import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
29import com.android.inputmethod.latin.utils.FileUtils;
30import com.android.inputmethod.latin.utils.LanguageModelParam;
31
32import java.io.File;
33import java.io.IOException;
34import java.util.ArrayList;
35import java.util.HashMap;
36import java.util.HashSet;
37import java.util.Locale;
38import java.util.Map;
39import java.util.Random;
40
41// TODO Use the seed passed as an argument for makedict test.
42@LargeTest
43public class BinaryDictionaryTests extends AndroidTestCase {
44    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
45    private static final String TEST_LOCALE = "test";
46    private static final int[] DICT_FORMAT_VERSIONS =
47            new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
48
49    private static boolean canCheckBigramProbability(final int formatVersion) {
50        return formatVersion > FormatSpec.VERSION401;
51    }
52
53    private static boolean supportsBeginningOfSentence(final int formatVersion) {
54        return formatVersion > FormatSpec.VERSION401;
55    }
56
57    private File createEmptyDictionaryAndGetFile(final String dictId,
58            final int formatVersion) throws IOException {
59        if (formatVersion == FormatSpec.VERSION4
60                || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
61                || formatVersion == FormatSpec.VERSION4_DEV) {
62            return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
63        } else {
64            throw new IOException("Dictionary format version " + formatVersion
65                    + " is not supported.");
66        }
67    }
68
69    private File createEmptyVer4DictionaryAndGetFile(final String dictId,
70            final int formatVersion) throws IOException {
71        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
72                getContext().getCacheDir());
73        file.delete();
74        file.mkdir();
75        Map<String, String> attributeMap = new HashMap<>();
76        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
77                Locale.ENGLISH, attributeMap)) {
78            return file;
79        } else {
80            throw new IOException("Empty dictionary " + file.getAbsolutePath()
81                    + " cannot be created. Format version: " + formatVersion);
82        }
83    }
84
85    public void testIsValidDictionary() {
86        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
87            testIsValidDictionary(formatVersion);
88        }
89    }
90
91    private void testIsValidDictionary(final int formatVersion) {
92        File dictFile = null;
93        try {
94            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
95        } catch (IOException e) {
96            fail("IOException while writing an initial dictionary : " + e);
97        }
98        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
99                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
100                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
101        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
102                binaryDictionary.isValidDictionary());
103        binaryDictionary.close();
104        assertFalse("binaryDictionary must be invalid after closing.",
105                binaryDictionary.isValidDictionary());
106        FileUtils.deleteRecursively(dictFile);
107        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
108                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
109                TEST_LOCALE, true /* isUpdatable */);
110        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
111                binaryDictionary.isValidDictionary());
112        binaryDictionary.close();
113    }
114
115    public void testConstructingDictionaryOnMemory() {
116        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
117            testConstructingDictionaryOnMemory(formatVersion);
118        }
119    }
120
121    private void testConstructingDictionaryOnMemory(final int formatVersion) {
122        File dictFile = null;
123        try {
124            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
125        } catch (IOException e) {
126            fail("IOException while writing an initial dictionary : " + e);
127        }
128        FileUtils.deleteRecursively(dictFile);
129        assertFalse(dictFile.exists());
130        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
131                true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
132                new HashMap<String, String>());
133        assertTrue(binaryDictionary.isValidDictionary());
134        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
135        final int probability = 100;
136        addUnigramWord(binaryDictionary, "word", probability);
137        assertEquals(probability, binaryDictionary.getFrequency("word"));
138        assertFalse(dictFile.exists());
139        binaryDictionary.flush();
140        assertTrue(dictFile.exists());
141        assertTrue(binaryDictionary.isValidDictionary());
142        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
143        assertEquals(probability, binaryDictionary.getFrequency("word"));
144        binaryDictionary.close();
145        dictFile.delete();
146    }
147
148    public void testAddTooLongWord() {
149        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
150            testAddTooLongWord(formatVersion);
151        }
152    }
153
154    private void testAddTooLongWord(final int formatVersion) {
155        File dictFile = null;
156        try {
157            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
158        } catch (IOException e) {
159            fail("IOException while writing an initial dictionary : " + e);
160        }
161        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
162                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
163                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
164
165        final StringBuffer stringBuilder = new StringBuffer();
166        for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
167            stringBuilder.append('a');
168        }
169        final String validLongWord = stringBuilder.toString();
170        stringBuilder.append('a');
171        final String invalidLongWord = stringBuilder.toString();
172        final int probability = 100;
173        addUnigramWord(binaryDictionary, "aaa", probability);
174        addUnigramWord(binaryDictionary, validLongWord, probability);
175        addUnigramWord(binaryDictionary, invalidLongWord, probability);
176        // Too long short cut.
177        binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
178                10 /* shortcutProbability */, false /* isBeginningOfSentence */,
179                false /* isNotAWord */, false /* isBlacklisted */,
180                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
181        addUnigramWord(binaryDictionary, "abc", probability);
182        final int updatedProbability = 200;
183        // Update.
184        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
185        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
186        addUnigramWord(binaryDictionary, "abc", updatedProbability);
187
188        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
189        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
190        assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
191                binaryDictionary.getFrequency(invalidLongWord));
192        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
193        dictFile.delete();
194    }
195
196    private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
197            final int probability) {
198        binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
199                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
200                false /* isBeginningOfSentence */, false /* isNotAWord */,
201                false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
202    }
203
204    private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
205            final String word1, final int probability) {
206        binaryDictionary.addNgramEntry(new PrevWordsInfo(word0), word1, probability,
207                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
208    }
209
210    private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
211            final String word0, final String word1) {
212        return binaryDictionary.isValidNgram(new PrevWordsInfo(word0), word1);
213    }
214
215    private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
216            final String word0, final String word1) {
217        binaryDictionary.removeNgramEntry(new PrevWordsInfo(word0), word1);
218    }
219
220    private static int getBigramProbability(final BinaryDictionary binaryDictionary,
221            final String word0,  final String word1) {
222        return binaryDictionary.getNgramProbability(new PrevWordsInfo(word0), word1);
223    }
224
225    public void testAddUnigramWord() {
226        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
227            testAddUnigramWord(formatVersion);
228        }
229    }
230
231    private void testAddUnigramWord(final int formatVersion) {
232        File dictFile = null;
233        try {
234            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
235        } catch (IOException e) {
236            fail("IOException while writing an initial dictionary : " + e);
237        }
238        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
239                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
240                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
241
242        final int probability = 100;
243        addUnigramWord(binaryDictionary, "aaa", probability);
244        // Reallocate and create.
245        addUnigramWord(binaryDictionary, "aab", probability);
246        // Insert into children.
247        addUnigramWord(binaryDictionary, "aac", probability);
248        // Make terminal.
249        addUnigramWord(binaryDictionary, "aa", probability);
250        // Create children.
251        addUnigramWord(binaryDictionary, "aaaa", probability);
252        // Reallocate and make termianl.
253        addUnigramWord(binaryDictionary, "a", probability);
254
255        final int updatedProbability = 200;
256        // Update.
257        addUnigramWord(binaryDictionary, "aaa", updatedProbability);
258
259        assertEquals(probability, binaryDictionary.getFrequency("aab"));
260        assertEquals(probability, binaryDictionary.getFrequency("aac"));
261        assertEquals(probability, binaryDictionary.getFrequency("aa"));
262        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
263        assertEquals(probability, binaryDictionary.getFrequency("a"));
264        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
265
266        dictFile.delete();
267    }
268
269    public void testRandomlyAddUnigramWord() {
270        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
271            testRandomlyAddUnigramWord(formatVersion);
272        }
273    }
274
275    private void testRandomlyAddUnigramWord(final int formatVersion) {
276        final int wordCount = 1000;
277        final int codePointSetSize = 50;
278        final long seed = System.currentTimeMillis();
279
280        File dictFile = null;
281        try {
282            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
283        } catch (IOException e) {
284            fail("IOException while writing an initial dictionary : " + e);
285        }
286        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
287                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
288                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
289
290        final HashMap<String, Integer> probabilityMap = new HashMap<>();
291        // Test a word that isn't contained within the dictionary.
292        final Random random = new Random(seed);
293        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
294        for (int i = 0; i < wordCount; ++i) {
295            final String word = CodePointUtils.generateWord(random, codePointSet);
296            probabilityMap.put(word, random.nextInt(0xFF));
297        }
298        for (String word : probabilityMap.keySet()) {
299            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
300        }
301        for (String word : probabilityMap.keySet()) {
302            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
303        }
304        dictFile.delete();
305    }
306
307    public void testAddBigramWords() {
308        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
309            testAddBigramWords(formatVersion);
310        }
311    }
312
313    private void testAddBigramWords(final int formatVersion) {
314        File dictFile = null;
315        try {
316            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
317        } catch (IOException e) {
318            fail("IOException while writing an initial dictionary : " + e);
319        }
320        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
321                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
322                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
323
324        final int unigramProbability = 100;
325        final int bigramProbability = 150;
326        final int updatedBigramProbability = 200;
327        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
328        addUnigramWord(binaryDictionary, "abb", unigramProbability);
329        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
330        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
331        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
332        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
333        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
334
335        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
336        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
337        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
338        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
339        if (canCheckBigramProbability(formatVersion)) {
340            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
341            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
342            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
343            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
344        }
345
346        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
347        if (canCheckBigramProbability(formatVersion)) {
348            assertEquals(updatedBigramProbability,
349                    getBigramProbability(binaryDictionary, "aaa", "abb"));
350        }
351
352        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
353        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
354        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
355        assertEquals(Dictionary.NOT_A_PROBABILITY,
356                getBigramProbability(binaryDictionary, "bcc", "aaa"));
357        assertEquals(Dictionary.NOT_A_PROBABILITY,
358                getBigramProbability(binaryDictionary, "bcc", "bbc"));
359        assertEquals(Dictionary.NOT_A_PROBABILITY,
360                getBigramProbability(binaryDictionary, "aaa", "aaa"));
361
362        // Testing bigram link.
363        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
364        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
365        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
366        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
367        addUnigramWord(binaryDictionary, "abc", unigramProbability);
368        addUnigramWord(binaryDictionary, "f", unigramProbability);
369
370        if (canCheckBigramProbability(formatVersion)) {
371            assertEquals(bigramProbability,
372                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
373        }
374        assertEquals(Dictionary.NOT_A_PROBABILITY,
375                getBigramProbability(binaryDictionary, "abcde", "fgh"));
376        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
377        if (canCheckBigramProbability(formatVersion)) {
378            assertEquals(updatedBigramProbability,
379                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
380        }
381
382        dictFile.delete();
383    }
384
385    public void testRandomlyAddBigramWords() {
386        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
387            testRandomlyAddBigramWords(formatVersion);
388        }
389    }
390
391    private void testRandomlyAddBigramWords(final int formatVersion) {
392        final int wordCount = 100;
393        final int bigramCount = 1000;
394        final int codePointSetSize = 50;
395        final long seed = System.currentTimeMillis();
396        final Random random = new Random(seed);
397
398        File dictFile = null;
399        try {
400            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
401        } catch (IOException e) {
402            fail("IOException while writing an initial dictionary : " + e);
403        }
404        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
405                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
406                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
407
408        final ArrayList<String> words = new ArrayList<>();
409        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
410        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
411        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
412        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
413
414        for (int i = 0; i < wordCount; ++i) {
415            final String word = CodePointUtils.generateWord(random, codePointSet);
416            words.add(word);
417            final int unigramProbability = random.nextInt(0xFF);
418            unigramProbabilities.put(word, unigramProbability);
419            addUnigramWord(binaryDictionary, word, unigramProbability);
420        }
421
422        for (int i = 0; i < bigramCount; i++) {
423            final String word0 = words.get(random.nextInt(wordCount));
424            final String word1 = words.get(random.nextInt(wordCount));
425            if (TextUtils.equals(word0, word1)) {
426                continue;
427            }
428            final Pair<String, String> bigram = new Pair<>(word0, word1);
429            bigramWords.add(bigram);
430            final int unigramProbability = unigramProbabilities.get(word1);
431            final int bigramProbability =
432                    unigramProbability + random.nextInt(0xFF - unigramProbability);
433            bigramProbabilities.put(bigram, bigramProbability);
434            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
435        }
436
437        for (final Pair<String, String> bigram : bigramWords) {
438            final int bigramProbability = bigramProbabilities.get(bigram);
439            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
440                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
441            if (canCheckBigramProbability(formatVersion)) {
442                assertEquals(bigramProbability,
443                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
444            }
445        }
446
447        dictFile.delete();
448    }
449
450    public void testRemoveBigramWords() {
451        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
452            testRemoveBigramWords(formatVersion);
453        }
454    }
455
456    private void testRemoveBigramWords(final int formatVersion) {
457        File dictFile = null;
458        try {
459            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
460        } catch (IOException e) {
461            fail("IOException while writing an initial dictionary : " + e);
462        }
463        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
464                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
465                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
466        final int unigramProbability = 100;
467        final int bigramProbability = 150;
468        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
469        addUnigramWord(binaryDictionary, "abb", unigramProbability);
470        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
471        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
472        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
473        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
474        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
475
476        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
477        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
478        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
479        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
480
481        removeBigramEntry(binaryDictionary, "aaa", "abb");
482        assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
483        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
484        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
485
486
487        removeBigramEntry(binaryDictionary, "aaa", "bcc");
488        assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
489        removeBigramEntry(binaryDictionary, "abb", "aaa");
490        assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
491        removeBigramEntry(binaryDictionary, "abb", "bcc");
492        assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
493
494        removeBigramEntry(binaryDictionary, "aaa", "abb");
495        // Test remove non-existing bigram operation.
496        removeBigramEntry(binaryDictionary, "aaa", "abb");
497        removeBigramEntry(binaryDictionary, "bcc", "aaa");
498
499        dictFile.delete();
500    }
501
502    public void testFlushDictionary() {
503        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
504            testFlushDictionary(formatVersion);
505        }
506    }
507
508    private void testFlushDictionary(final int formatVersion) {
509        File dictFile = null;
510        try {
511            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
512        } catch (IOException e) {
513            fail("IOException while writing an initial dictionary : " + e);
514        }
515        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
516                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
517                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
518
519        final int probability = 100;
520        addUnigramWord(binaryDictionary, "aaa", probability);
521        addUnigramWord(binaryDictionary, "abcd", probability);
522        // Close without flushing.
523        binaryDictionary.close();
524
525        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
526                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
527                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
528
529        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
530        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
531
532        addUnigramWord(binaryDictionary, "aaa", probability);
533        addUnigramWord(binaryDictionary, "abcd", probability);
534        binaryDictionary.flush();
535        binaryDictionary.close();
536
537        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
538                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
539                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
540
541        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
542        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
543        addUnigramWord(binaryDictionary, "bcde", probability);
544        binaryDictionary.flush();
545        binaryDictionary.close();
546
547        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
548                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
549                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
550        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
551        binaryDictionary.close();
552
553        dictFile.delete();
554    }
555
556    public void testFlushWithGCDictionary() {
557        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
558            testFlushWithGCDictionary(formatVersion);
559        }
560    }
561
562    private void testFlushWithGCDictionary(final int formatVersion) {
563        File dictFile = null;
564        try {
565            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
566        } catch (IOException e) {
567            fail("IOException while writing an initial dictionary : " + e);
568        }
569        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
570                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
571                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
572
573        final int unigramProbability = 100;
574        final int bigramProbability = 150;
575        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
576        addUnigramWord(binaryDictionary, "abb", unigramProbability);
577        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
578        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
579        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
580        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
581        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
582        binaryDictionary.flushWithGC();
583        binaryDictionary.close();
584
585        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
586                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
587                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
588        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
589        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
590        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
591        if (canCheckBigramProbability(formatVersion)) {
592            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
593            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
594            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
595            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
596        }
597        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
598        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
599        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
600        binaryDictionary.flushWithGC();
601        binaryDictionary.close();
602
603        dictFile.delete();
604    }
605
606    public void testAddBigramWordsAndFlashWithGC() {
607        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
608            testAddBigramWordsAndFlashWithGC(formatVersion);
609        }
610    }
611
612    // TODO: Evaluate performance of GC
613    private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
614        final int wordCount = 100;
615        final int bigramCount = 1000;
616        final int codePointSetSize = 30;
617        final long seed = System.currentTimeMillis();
618        final Random random = new Random(seed);
619
620        File dictFile = null;
621        try {
622            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
623        } catch (IOException e) {
624            fail("IOException while writing an initial dictionary : " + e);
625        }
626
627        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
628                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
629                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
630
631        final ArrayList<String> words = new ArrayList<>();
632        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
633        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
634        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
635        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
636
637        for (int i = 0; i < wordCount; ++i) {
638            final String word = CodePointUtils.generateWord(random, codePointSet);
639            words.add(word);
640            final int unigramProbability = random.nextInt(0xFF);
641            unigramProbabilities.put(word, unigramProbability);
642            addUnigramWord(binaryDictionary, word, unigramProbability);
643        }
644
645        for (int i = 0; i < bigramCount; i++) {
646            final String word0 = words.get(random.nextInt(wordCount));
647            final String word1 = words.get(random.nextInt(wordCount));
648            if (TextUtils.equals(word0, word1)) {
649                continue;
650            }
651            final Pair<String, String> bigram = new Pair<>(word0, word1);
652            bigramWords.add(bigram);
653            final int unigramProbability = unigramProbabilities.get(word1);
654            final int bigramProbability =
655                    unigramProbability + random.nextInt(0xFF - unigramProbability);
656            bigramProbabilities.put(bigram, bigramProbability);
657            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
658        }
659
660        binaryDictionary.flushWithGC();
661        binaryDictionary.close();
662        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
663                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
664                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
665
666
667        for (final Pair<String, String> bigram : bigramWords) {
668            final int bigramProbability = bigramProbabilities.get(bigram);
669            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
670                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
671            if (canCheckBigramProbability(formatVersion)) {
672                assertEquals(bigramProbability,
673                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
674            }
675        }
676
677        dictFile.delete();
678    }
679
680    public void testRandomOperationsAndFlashWithGC() {
681        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
682            testRandomOperationsAndFlashWithGC(formatVersion);
683        }
684    }
685
686    private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
687        final int flashWithGCIterationCount = 50;
688        final int operationCountInEachIteration = 200;
689        final int initialUnigramCount = 100;
690        final float addUnigramProb = 0.5f;
691        final float addBigramProb = 0.8f;
692        final float removeBigramProb = 0.2f;
693        final int codePointSetSize = 30;
694
695        final long seed = System.currentTimeMillis();
696        final Random random = new Random(seed);
697
698        File dictFile = null;
699        try {
700            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
701        } catch (IOException e) {
702            fail("IOException while writing an initial dictionary : " + e);
703        }
704
705        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
706                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
707                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
708        final ArrayList<String> words = new ArrayList<>();
709        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
710        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
711        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
712        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
713        for (int i = 0; i < initialUnigramCount; ++i) {
714            final String word = CodePointUtils.generateWord(random, codePointSet);
715            words.add(word);
716            final int unigramProbability = random.nextInt(0xFF);
717            unigramProbabilities.put(word, unigramProbability);
718            addUnigramWord(binaryDictionary, word, unigramProbability);
719        }
720        binaryDictionary.flushWithGC();
721        binaryDictionary.close();
722
723        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
724            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
725                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
726                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
727            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
728                // Add unigram.
729                if (random.nextFloat() < addUnigramProb) {
730                    final String word = CodePointUtils.generateWord(random, codePointSet);
731                    words.add(word);
732                    final int unigramProbability = random.nextInt(0xFF);
733                    unigramProbabilities.put(word, unigramProbability);
734                    addUnigramWord(binaryDictionary, word, unigramProbability);
735                }
736                // Add bigram.
737                if (random.nextFloat() < addBigramProb && words.size() > 2) {
738                    final int word0Index = random.nextInt(words.size());
739                    int word1Index = random.nextInt(words.size() - 1);
740                    if (word0Index <= word1Index) {
741                        word1Index++;
742                    }
743                    final String word0 = words.get(word0Index);
744                    final String word1 = words.get(word1Index);
745                    if (TextUtils.equals(word0, word1)) {
746                        continue;
747                    }
748                    final int unigramProbability = unigramProbabilities.get(word1);
749                    final int bigramProbability =
750                            unigramProbability + random.nextInt(0xFF - unigramProbability);
751                    final Pair<String, String> bigram = new Pair<>(word0, word1);
752                    bigramWords.add(bigram);
753                    bigramProbabilities.put(bigram, bigramProbability);
754                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
755                }
756                // Remove bigram.
757                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
758                    final int bigramIndex = random.nextInt(bigramWords.size());
759                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
760                    bigramWords.remove(bigramIndex);
761                    bigramProbabilities.remove(bigram);
762                    removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
763                }
764            }
765
766            // Test whether the all unigram operations are collectlly handled.
767            for (int i = 0; i < words.size(); i++) {
768                final String word = words.get(i);
769                final int unigramProbability = unigramProbabilities.get(word);
770                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
771            }
772            // Test whether the all bigram operations are collectlly handled.
773            for (int i = 0; i < bigramWords.size(); i++) {
774                final Pair<String, String> bigram = bigramWords.get(i);
775                final int probability;
776                if (bigramProbabilities.containsKey(bigram)) {
777                    final int bigramProbability = bigramProbabilities.get(bigram);
778                    probability = bigramProbability;
779                } else {
780                    probability = Dictionary.NOT_A_PROBABILITY;
781                }
782
783                if (canCheckBigramProbability(formatVersion)) {
784                    assertEquals(probability,
785                            getBigramProbability(binaryDictionary, bigram.first, bigram.second));
786                }
787                assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
788                        isValidBigram(binaryDictionary, bigram.first, bigram.second));
789            }
790            binaryDictionary.flushWithGC();
791            binaryDictionary.close();
792        }
793
794        dictFile.delete();
795    }
796
797    public void testAddManyUnigramsAndFlushWithGC() {
798        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
799            testAddManyUnigramsAndFlushWithGC(formatVersion);
800        }
801    }
802
803    private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
804        final int flashWithGCIterationCount = 3;
805        final int codePointSetSize = 50;
806
807        final long seed = System.currentTimeMillis();
808        final Random random = new Random(seed);
809
810        File dictFile = null;
811        try {
812            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
813        } catch (IOException e) {
814            fail("IOException while writing an initial dictionary : " + e);
815        }
816
817        final ArrayList<String> words = new ArrayList<>();
818        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
819        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
820
821        BinaryDictionary binaryDictionary;
822        for (int i = 0; i < flashWithGCIterationCount; i++) {
823            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
824                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
825                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
826            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
827                final String word = CodePointUtils.generateWord(random, codePointSet);
828                words.add(word);
829                final int unigramProbability = random.nextInt(0xFF);
830                unigramProbabilities.put(word, unigramProbability);
831                addUnigramWord(binaryDictionary, word, unigramProbability);
832            }
833
834            for (int j = 0; j < words.size(); j++) {
835                final String word = words.get(j);
836                final int unigramProbability = unigramProbabilities.get(word);
837                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
838            }
839
840            binaryDictionary.flushWithGC();
841            binaryDictionary.close();
842        }
843
844        dictFile.delete();
845    }
846
847    public void testUnigramAndBigramCount() {
848        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
849            testUnigramAndBigramCount(formatVersion);
850        }
851    }
852
853    private void testUnigramAndBigramCount(final int formatVersion) {
854        final int flashWithGCIterationCount = 10;
855        final int codePointSetSize = 50;
856        final int unigramCountPerIteration = 1000;
857        final int bigramCountPerIteration = 2000;
858        final long seed = System.currentTimeMillis();
859        final Random random = new Random(seed);
860
861        File dictFile = null;
862        try {
863            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
864        } catch (IOException e) {
865            fail("IOException while writing an initial dictionary : " + e);
866        }
867
868        final ArrayList<String> words = new ArrayList<>();
869        final HashSet<Pair<String, String>> bigrams = new HashSet<>();
870        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
871
872        BinaryDictionary binaryDictionary;
873        for (int i = 0; i < flashWithGCIterationCount; i++) {
874            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
875                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
876                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
877            for (int j = 0; j < unigramCountPerIteration; j++) {
878                final String word = CodePointUtils.generateWord(random, codePointSet);
879                words.add(word);
880                final int unigramProbability = random.nextInt(0xFF);
881                addUnigramWord(binaryDictionary, word, unigramProbability);
882            }
883            for (int j = 0; j < bigramCountPerIteration; j++) {
884                final String word0 = words.get(random.nextInt(words.size()));
885                final String word1 = words.get(random.nextInt(words.size()));
886                if (TextUtils.equals(word0, word1)) {
887                    continue;
888                }
889                bigrams.add(new Pair<>(word0, word1));
890                final int bigramProbability = random.nextInt(0xF);
891                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
892            }
893            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
894                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
895            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
896                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
897            binaryDictionary.flushWithGC();
898            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
899                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
900            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
901                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
902            binaryDictionary.close();
903        }
904
905        dictFile.delete();
906    }
907
908    public void testAddMultipleDictionaryEntries() {
909        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
910            testAddMultipleDictionaryEntries(formatVersion);
911        }
912    }
913
914    private void testAddMultipleDictionaryEntries(final int formatVersion) {
915        final int codePointSetSize = 20;
916        final int lmParamCount = 1000;
917        final double bigramContinueRate = 0.9;
918        final long seed = System.currentTimeMillis();
919        final Random random = new Random(seed);
920
921        File dictFile = null;
922        try {
923            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
924        } catch (IOException e) {
925            fail("IOException while writing an initial dictionary : " + e);
926        }
927
928        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
929        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
930        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
931
932        final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
933        String prevWord = null;
934        for (int i = 0; i < languageModelParams.length; i++) {
935            final String word = CodePointUtils.generateWord(random, codePointSet);
936            final int probability = random.nextInt(0xFF);
937            final int bigramProbability = probability + random.nextInt(0xFF - probability);
938            unigramProbabilities.put(word, probability);
939            if (prevWord == null) {
940                languageModelParams[i] = new LanguageModelParam(word, probability,
941                        BinaryDictionary.NOT_A_VALID_TIMESTAMP);
942            } else {
943                languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
944                        bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
945                bigramProbabilities.put(new Pair<>(prevWord, word),
946                        bigramProbability);
947            }
948            prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
949        }
950
951        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
952                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
953                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
954        binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
955
956        for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
957            assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
958        }
959
960        for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
961            final String word0 = entry.getKey().first;
962            final String word1 = entry.getKey().second;
963            final int bigramProbability = entry.getValue();
964            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
965                    isValidBigram(binaryDictionary, word0, word1));
966            if (canCheckBigramProbability(formatVersion)) {
967                assertEquals(bigramProbability,
968                        getBigramProbability(binaryDictionary, word0, word1));
969            }
970        }
971    }
972
973    public void testGetWordProperties() {
974        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
975            testGetWordProperties(formatVersion);
976        }
977    }
978
979    private void testGetWordProperties(final int formatVersion) {
980        final long seed = System.currentTimeMillis();
981        final Random random = new Random(seed);
982        final int UNIGRAM_COUNT = 1000;
983        final int BIGRAM_COUNT = 1000;
984        final int codePointSetSize = 20;
985        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
986
987        File dictFile = null;
988        try {
989            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
990        } catch (IOException e) {
991            fail("IOException while writing an initial dictionary : " + e);
992        }
993        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
994                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
995                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
996
997        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
998        assertFalse(invalidWordProperty.isValid());
999
1000        final ArrayList<String> words = new ArrayList<>();
1001        final HashMap<String, Integer> wordProbabilities = new HashMap<>();
1002        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1003        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1004
1005        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1006            final String word = CodePointUtils.generateWord(random, codePointSet);
1007            final int unigramProbability = random.nextInt(0xFF);
1008            final boolean isNotAWord = random.nextBoolean();
1009            final boolean isBlacklisted = random.nextBoolean();
1010            // TODO: Add tests for historical info.
1011            binaryDictionary.addUnigramEntry(word, unigramProbability,
1012                    null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
1013                    false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
1014                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
1015            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1016                binaryDictionary.flushWithGC();
1017            }
1018            words.add(word);
1019            wordProbabilities.put(word, unigramProbability);
1020            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
1021            assertEquals(word, wordProperty.mWord);
1022            assertTrue(wordProperty.isValid());
1023            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
1024            assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
1025            assertEquals(false, wordProperty.mHasBigrams);
1026            assertEquals(false, wordProperty.mHasShortcuts);
1027            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
1028            assertTrue(wordProperty.mShortcutTargets.isEmpty());
1029        }
1030
1031        for (int i = 0; i < BIGRAM_COUNT; i++) {
1032            final int word0Index = random.nextInt(wordProbabilities.size());
1033            final int word1Index = random.nextInt(wordProbabilities.size());
1034            if (word0Index == word1Index) {
1035                continue;
1036            }
1037            final String word0 = words.get(word0Index);
1038            final String word1 = words.get(word1Index);
1039            final int unigramProbability = wordProbabilities.get(word1);
1040            final int bigramProbability =
1041                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1042            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1043            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1044                binaryDictionary.flushWithGC();
1045            }
1046            if (!bigrams.containsKey(word0)) {
1047                final HashSet<String> bigramWord1s = new HashSet<>();
1048                bigrams.put(word0, bigramWord1s);
1049            }
1050            bigrams.get(word0).add(word1);
1051            bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
1052        }
1053
1054        for (int i = 0; i < words.size(); i++) {
1055            final String word0 = words.get(i);
1056            if (!bigrams.containsKey(word0)) {
1057                continue;
1058            }
1059            final HashSet<String> bigramWord1s = bigrams.get(word0);
1060            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0);
1061            assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
1062            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1063                final String word1 = wordProperty.mBigrams.get(j).mWord;
1064                assertTrue(bigramWord1s.contains(word1));
1065                if (canCheckBigramProbability(formatVersion)) {
1066                    final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
1067                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1068                }
1069            }
1070        }
1071    }
1072
1073    public void testIterateAllWords() {
1074        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1075            testIterateAllWords(formatVersion);
1076        }
1077    }
1078
1079    private void testIterateAllWords(final int formatVersion) {
1080        final long seed = System.currentTimeMillis();
1081        final Random random = new Random(seed);
1082        final int UNIGRAM_COUNT = 1000;
1083        final int BIGRAM_COUNT = 1000;
1084        final int codePointSetSize = 20;
1085        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1086
1087        File dictFile = null;
1088        try {
1089            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1090        } catch (IOException e) {
1091            fail("IOException while writing an initial dictionary : " + e);
1092        }
1093        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1094                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1095                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1096
1097        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
1098        assertFalse(invalidWordProperty.isValid());
1099
1100        final ArrayList<String> words = new ArrayList<>();
1101        final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
1102        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1103        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
1104                new HashMap<>();
1105
1106        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1107            final String word = CodePointUtils.generateWord(random, codePointSet);
1108            final int unigramProbability = random.nextInt(0xFF);
1109            addUnigramWord(binaryDictionary, word, unigramProbability);
1110            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1111                binaryDictionary.flushWithGC();
1112            }
1113            words.add(word);
1114            wordProbabilitiesToCheckLater.put(word, unigramProbability);
1115        }
1116
1117        for (int i = 0; i < BIGRAM_COUNT; i++) {
1118            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1119            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1120            if (word0Index == word1Index) {
1121                continue;
1122            }
1123            final String word0 = words.get(word0Index);
1124            final String word1 = words.get(word1Index);
1125            final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
1126            final int bigramProbability =
1127                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1128            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1129            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1130                binaryDictionary.flushWithGC();
1131            }
1132            if (!bigrams.containsKey(word0)) {
1133                final HashSet<String> bigramWord1s = new HashSet<>();
1134                bigrams.put(word0, bigramWord1s);
1135            }
1136            bigrams.get(word0).add(word1);
1137            bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
1138        }
1139
1140        final HashSet<String> wordSet = new HashSet<>(words);
1141        final HashSet<Pair<String, String>> bigramSet =
1142                new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
1143        int token = 0;
1144        do {
1145            final BinaryDictionary.GetNextWordPropertyResult result =
1146                    binaryDictionary.getNextWordProperty(token);
1147            final WordProperty wordProperty = result.mWordProperty;
1148            final String word0 = wordProperty.mWord;
1149            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
1150                    wordProperty.mProbabilityInfo.mProbability);
1151            wordSet.remove(word0);
1152            final HashSet<String> bigramWord1s = bigrams.get(word0);
1153            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1154                final String word1 = wordProperty.mBigrams.get(j).mWord;
1155                assertTrue(bigramWord1s.contains(word1));
1156                final Pair<String, String> bigram = new Pair<>(word0, word1);
1157                if (canCheckBigramProbability(formatVersion)) {
1158                    final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
1159                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1160                }
1161                bigramSet.remove(bigram);
1162            }
1163            token = result.mNextToken;
1164        } while (token != 0);
1165        assertTrue(wordSet.isEmpty());
1166        assertTrue(bigramSet.isEmpty());
1167    }
1168
1169    public void testAddShortcuts() {
1170        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1171            testAddShortcuts(formatVersion);
1172        }
1173    }
1174
1175    private void testAddShortcuts(final int formatVersion) {
1176        File dictFile = null;
1177        try {
1178            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1179        } catch (IOException e) {
1180            fail("IOException while writing an initial dictionary : " + e);
1181        }
1182        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1183                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1184                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1185
1186        final int unigramProbability = 100;
1187        final int shortcutProbability = 10;
1188        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1189                shortcutProbability, false /* isBeginningOfSentence */,
1190                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1191        WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
1192        assertEquals(1, wordProperty.mShortcutTargets.size());
1193        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1194        assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
1195        final int updatedShortcutProbability = 2;
1196        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1197                updatedShortcutProbability, false /* isBeginningOfSentence */,
1198                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1199        wordProperty = binaryDictionary.getWordProperty("aaa");
1200        assertEquals(1, wordProperty.mShortcutTargets.size());
1201        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1202        assertEquals(updatedShortcutProbability,
1203                wordProperty.mShortcutTargets.get(0).getProbability());
1204        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
1205                shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1206                false /* isBlacklisted */, 0 /* timestamp */);
1207        final HashMap<String, Integer> shortcutTargets = new HashMap<>();
1208        shortcutTargets.put("zzz", updatedShortcutProbability);
1209        shortcutTargets.put("yyy", shortcutProbability);
1210        wordProperty = binaryDictionary.getWordProperty("aaa");
1211        assertEquals(2, wordProperty.mShortcutTargets.size());
1212        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1213            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1214            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1215                    shortcutTarget.getProbability());
1216            shortcutTargets.remove(shortcutTarget.mWord);
1217        }
1218        shortcutTargets.put("zzz", updatedShortcutProbability);
1219        shortcutTargets.put("yyy", shortcutProbability);
1220        binaryDictionary.flushWithGC();
1221        wordProperty = binaryDictionary.getWordProperty("aaa");
1222        assertEquals(2, wordProperty.mShortcutTargets.size());
1223        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1224            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1225            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1226                    shortcutTarget.getProbability());
1227            shortcutTargets.remove(shortcutTarget.mWord);
1228        }
1229    }
1230
1231    public void testAddManyShortcuts() {
1232        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1233            testAddManyShortcuts(formatVersion);
1234        }
1235    }
1236
1237    private void testAddManyShortcuts(final int formatVersion) {
1238        final long seed = System.currentTimeMillis();
1239        final Random random = new Random(seed);
1240        final int UNIGRAM_COUNT = 1000;
1241        final int SHORTCUT_COUNT = 10000;
1242        final int codePointSetSize = 20;
1243        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1244
1245        final ArrayList<String> words = new ArrayList<>();
1246        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1247        final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
1248
1249        File dictFile = null;
1250        try {
1251            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1252        } catch (IOException e) {
1253            fail("IOException while writing an initial dictionary : " + e);
1254        }
1255        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1256                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1257                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1258
1259        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1260            final String word = CodePointUtils.generateWord(random, codePointSet);
1261            final int unigramProbability = random.nextInt(0xFF);
1262            addUnigramWord(binaryDictionary, word, unigramProbability);
1263            words.add(word);
1264            unigramProbabilities.put(word, unigramProbability);
1265            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1266                binaryDictionary.flushWithGC();
1267            }
1268        }
1269        for (int i = 0; i < SHORTCUT_COUNT; i++) {
1270            final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
1271            final int shortcutProbability = random.nextInt(0xF);
1272            final String word = words.get(random.nextInt(words.size()));
1273            final int unigramProbability = unigramProbabilities.get(word);
1274            binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
1275                    shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1276                    false /* isBlacklisted */, 0 /* timestamp */);
1277            if (shortcutTargets.containsKey(word)) {
1278                final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
1279                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1280            } else {
1281                final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
1282                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1283                shortcutTargets.put(word, shortcutTargetsOfWord);
1284            }
1285            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1286                binaryDictionary.flushWithGC();
1287            }
1288        }
1289
1290        for (final String word : words) {
1291            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
1292            assertEquals((int)unigramProbabilities.get(word),
1293                    wordProperty.mProbabilityInfo.mProbability);
1294            if (!shortcutTargets.containsKey(word)) {
1295                // The word does not have shortcut targets.
1296                continue;
1297            }
1298            assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
1299            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1300                final String targetCodePonts = shortcutTarget.mWord;
1301                assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
1302                        shortcutTarget.getProbability());
1303            }
1304        }
1305    }
1306
1307    public void testDictMigration() {
1308        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1309            testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1310        }
1311    }
1312
1313    private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1314        File dictFile = null;
1315        try {
1316            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1317        } catch (IOException e) {
1318            fail("IOException while writing an initial dictionary : " + e);
1319        }
1320        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1321                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1322                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1323        final int unigramProbability = 100;
1324        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
1325        addUnigramWord(binaryDictionary, "bbb", unigramProbability);
1326        final int bigramProbability = 150;
1327        addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
1328        final int shortcutProbability = 10;
1329        binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
1330                false /* isBeginningOfSentence */, false /* isNotAWord */,
1331                false /* isBlacklisted */, 0 /* timestamp */);
1332        binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
1333                Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
1334                true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
1335        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1336        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1337        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1338        assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
1339        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1340        assertTrue(binaryDictionary.isValidDictionary());
1341        assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
1342        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1343        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1344        if (canCheckBigramProbability(toFormatVersion)) {
1345            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
1346        }
1347        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1348        WordProperty wordProperty = binaryDictionary.getWordProperty("ccc");
1349        assertEquals(1, wordProperty.mShortcutTargets.size());
1350        assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
1351        wordProperty = binaryDictionary.getWordProperty("ddd");
1352        assertTrue(wordProperty.mIsBlacklistEntry);
1353        assertTrue(wordProperty.mIsNotAWord);
1354    }
1355
1356    public void testLargeDictMigration() {
1357        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1358            testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1359        }
1360    }
1361
1362    private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1363        final int UNIGRAM_COUNT = 3000;
1364        final int BIGRAM_COUNT = 3000;
1365        final int codePointSetSize = 50;
1366        final long seed = System.currentTimeMillis();
1367        final Random random = new Random(seed);
1368
1369        File dictFile = null;
1370        try {
1371            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1372        } catch (IOException e) {
1373            fail("IOException while writing an initial dictionary : " + e);
1374        }
1375        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1376                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1377                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1378
1379        final ArrayList<String> words = new ArrayList<>();
1380        final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
1381        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1382        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1383        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1384
1385        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1386            final String word = CodePointUtils.generateWord(random, codePointSet);
1387            final int unigramProbability = random.nextInt(0xFF);
1388            addUnigramWord(binaryDictionary, word, unigramProbability);
1389            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1390                binaryDictionary.flushWithGC();
1391            }
1392            words.add(word);
1393            unigramProbabilities.put(word, unigramProbability);
1394        }
1395
1396        for (int i = 0; i < BIGRAM_COUNT; i++) {
1397            final int word0Index = random.nextInt(words.size());
1398            final int word1Index = random.nextInt(words.size());
1399            if (word0Index == word1Index) {
1400                continue;
1401            }
1402            final String word0 = words.get(word0Index);
1403            final String word1 = words.get(word1Index);
1404            final int unigramProbability = unigramProbabilities.get(word1);
1405            final int bigramProbability =
1406                    random.nextInt(0xFF - unigramProbability) + unigramProbability;
1407            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1408            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1409                binaryDictionary.flushWithGC();
1410            }
1411            final Pair<String, String> bigram = new Pair<>(word0, word1);
1412            bigrams.add(bigram);
1413            bigramProbabilities.put(bigram, bigramProbability);
1414        }
1415        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1416
1417        for (final String word : words) {
1418            assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
1419        }
1420        assertEquals(unigramProbabilities.size(), Integer.parseInt(
1421                binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
1422
1423        for (final Pair<String, String> bigram : bigrams) {
1424            if (canCheckBigramProbability(toFormatVersion)) {
1425                assertEquals((int)bigramProbabilities.get(bigram),
1426                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
1427            }
1428            assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
1429        }
1430        assertEquals(bigramProbabilities.size(), Integer.parseInt(
1431                binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
1432    }
1433
1434    public void testBeginningOfSentence() {
1435        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1436            if (supportsBeginningOfSentence(formatVersion)) {
1437                testBeginningOfSentence(formatVersion);
1438            }
1439        }
1440    }
1441
1442    private void testBeginningOfSentence(final int formatVersion) {
1443        File dictFile = null;
1444        try {
1445            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1446        } catch (IOException e) {
1447            fail("IOException while writing an initial dictionary : " + e);
1448        }
1449        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1450                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1451                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1452        final int dummyProbability = 0;
1453        binaryDictionary.addUnigramEntry("", dummyProbability, "" /* shortcutTarget */,
1454                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
1455                true /* isBeginningOfSentence */, true /* isNotAWord */, false /* isBlacklisted */,
1456                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1457        final PrevWordsInfo prevWordsInfoStartOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
1458        final int bigramProbability = 200;
1459        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
1460        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
1461                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1462        assertEquals(bigramProbability,
1463                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
1464        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "aaa", bigramProbability,
1465                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1466        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
1467        binaryDictionary.addNgramEntry(prevWordsInfoStartOfSentence, "bbb", bigramProbability,
1468                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1469        binaryDictionary.flushWithGC();
1470        assertEquals(bigramProbability,
1471                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "aaa"));
1472        assertEquals(bigramProbability,
1473                binaryDictionary.getNgramProbability(prevWordsInfoStartOfSentence, "bbb"));
1474    }
1475
1476    public void testGetMaxFrequencyOfExactMatches() {
1477        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1478            testGetMaxFrequencyOfExactMatches(formatVersion);
1479        }
1480    }
1481
1482    private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
1483        File dictFile = null;
1484        try {
1485            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1486        } catch (IOException e) {
1487            fail("IOException while writing an initial dictionary : " + e);
1488        }
1489        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1490                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1491                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1492        addUnigramWord(binaryDictionary, "abc", 10);
1493        addUnigramWord(binaryDictionary, "aBc", 15);
1494        assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1495        addUnigramWord(binaryDictionary, "ab'c", 20);
1496        assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1497        addUnigramWord(binaryDictionary, "a-b-c", 25);
1498        assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1499        addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
1500        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1501        addUnigramWord(binaryDictionary, "ab c", 255);
1502        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1503    }
1504}
1505