1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.text.TextUtils;
22import android.util.Pair;
23
24import com.android.inputmethod.latin.PrevWordsInfo.WordInfo;
25import com.android.inputmethod.latin.makedict.CodePointUtils;
26import com.android.inputmethod.latin.makedict.FormatSpec;
27import com.android.inputmethod.latin.makedict.WeightedString;
28import com.android.inputmethod.latin.makedict.WordProperty;
29import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
30import com.android.inputmethod.latin.utils.FileUtils;
31import com.android.inputmethod.latin.utils.LanguageModelParam;
32
33import java.io.File;
34import java.io.IOException;
35import java.util.ArrayList;
36import java.util.HashMap;
37import java.util.HashSet;
38import java.util.Locale;
39import java.util.Map;
40import java.util.Random;
41
42// TODO Use the seed passed as an argument for makedict test.
43@LargeTest
44public class BinaryDictionaryTests extends AndroidTestCase {
45    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
46    private static final String TEST_LOCALE = "test";
47    private static final int[] DICT_FORMAT_VERSIONS =
48            new int[] { FormatSpec.VERSION4, FormatSpec.VERSION4_DEV };
49
50    private static boolean canCheckBigramProbability(final int formatVersion) {
51        return formatVersion > FormatSpec.VERSION401;
52    }
53
54    private static boolean supportsBeginningOfSentence(final int formatVersion) {
55        return formatVersion > FormatSpec.VERSION401;
56    }
57
58    private File createEmptyDictionaryAndGetFile(final String dictId,
59            final int formatVersion) throws IOException {
60        if (formatVersion == FormatSpec.VERSION4
61                || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING
62                || formatVersion == FormatSpec.VERSION4_DEV) {
63            return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion);
64        } else {
65            throw new IOException("Dictionary format version " + formatVersion
66                    + " is not supported.");
67        }
68    }
69
70    private File createEmptyVer4DictionaryAndGetFile(final String dictId,
71            final int formatVersion) throws IOException {
72        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
73                getContext().getCacheDir());
74        file.delete();
75        file.mkdir();
76        Map<String, String> attributeMap = new HashMap<>();
77        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion,
78                Locale.ENGLISH, attributeMap)) {
79            return file;
80        } else {
81            throw new IOException("Empty dictionary " + file.getAbsolutePath()
82                    + " cannot be created. Format version: " + formatVersion);
83        }
84    }
85
86    public void testIsValidDictionary() {
87        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
88            testIsValidDictionary(formatVersion);
89        }
90    }
91
92    private void testIsValidDictionary(final int formatVersion) {
93        File dictFile = null;
94        try {
95            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
96        } catch (IOException e) {
97            fail("IOException while writing an initial dictionary : " + e);
98        }
99        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
100                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
101                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
102        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
103                binaryDictionary.isValidDictionary());
104        binaryDictionary.close();
105        assertFalse("binaryDictionary must be invalid after closing.",
106                binaryDictionary.isValidDictionary());
107        FileUtils.deleteRecursively(dictFile);
108        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
109                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
110                TEST_LOCALE, true /* isUpdatable */);
111        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
112                binaryDictionary.isValidDictionary());
113        binaryDictionary.close();
114    }
115
116    public void testConstructingDictionaryOnMemory() {
117        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
118            testConstructingDictionaryOnMemory(formatVersion);
119        }
120    }
121
122    private void testConstructingDictionaryOnMemory(final int formatVersion) {
123        File dictFile = null;
124        try {
125            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
126        } catch (IOException e) {
127            fail("IOException while writing an initial dictionary : " + e);
128        }
129        FileUtils.deleteRecursively(dictFile);
130        assertFalse(dictFile.exists());
131        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
132                true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
133                new HashMap<String, String>());
134        assertTrue(binaryDictionary.isValidDictionary());
135        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
136        final int probability = 100;
137        addUnigramWord(binaryDictionary, "word", probability);
138        assertEquals(probability, binaryDictionary.getFrequency("word"));
139        assertFalse(dictFile.exists());
140        binaryDictionary.flush();
141        assertTrue(dictFile.exists());
142        assertTrue(binaryDictionary.isValidDictionary());
143        assertEquals(formatVersion, binaryDictionary.getFormatVersion());
144        assertEquals(probability, binaryDictionary.getFrequency("word"));
145        binaryDictionary.close();
146        dictFile.delete();
147    }
148
149    public void testAddTooLongWord() {
150        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
151            testAddTooLongWord(formatVersion);
152        }
153    }
154
155    private void testAddTooLongWord(final int formatVersion) {
156        File dictFile = null;
157        try {
158            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
159        } catch (IOException e) {
160            fail("IOException while writing an initial dictionary : " + e);
161        }
162        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
163                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
164                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
165
166        final StringBuffer stringBuilder = new StringBuffer();
167        for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
168            stringBuilder.append('a');
169        }
170        final String validLongWord = stringBuilder.toString();
171        stringBuilder.append('a');
172        final String invalidLongWord = stringBuilder.toString();
173        final int probability = 100;
174        addUnigramWord(binaryDictionary, "aaa", probability);
175        addUnigramWord(binaryDictionary, validLongWord, probability);
176        addUnigramWord(binaryDictionary, invalidLongWord, probability);
177        // Too long short cut.
178        binaryDictionary.addUnigramEntry("a", probability, invalidLongWord,
179                10 /* shortcutProbability */, false /* isBeginningOfSentence */,
180                false /* isNotAWord */, false /* isBlacklisted */,
181                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
182        addUnigramWord(binaryDictionary, "abc", probability);
183        final int updatedProbability = 200;
184        // Update.
185        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
186        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
187        addUnigramWord(binaryDictionary, "abc", updatedProbability);
188
189        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
190        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
191        assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
192                binaryDictionary.getFrequency(invalidLongWord));
193        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
194        dictFile.delete();
195    }
196
197    private static void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
198            final int probability) {
199        binaryDictionary.addUnigramEntry(word, probability, "" /* shortcutTarget */,
200                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
201                false /* isBeginningOfSentence */, false /* isNotAWord */,
202                false /* isBlacklisted */, BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
203    }
204
205    private static void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
206            final String word1, final int probability) {
207        binaryDictionary.addNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1, probability,
208                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
209    }
210
211    private static boolean isValidBigram(final BinaryDictionary binaryDictionary,
212            final String word0, final String word1) {
213        return binaryDictionary.isValidNgram(new PrevWordsInfo(new WordInfo(word0)), word1);
214    }
215
216    private static void removeBigramEntry(final BinaryDictionary binaryDictionary,
217            final String word0, final String word1) {
218        binaryDictionary.removeNgramEntry(new PrevWordsInfo(new WordInfo(word0)), word1);
219    }
220
221    private static int getBigramProbability(final BinaryDictionary binaryDictionary,
222            final String word0,  final String word1) {
223        return binaryDictionary.getNgramProbability(new PrevWordsInfo(new WordInfo(word0)), word1);
224    }
225
226    public void testAddUnigramWord() {
227        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
228            testAddUnigramWord(formatVersion);
229        }
230    }
231
232    private void testAddUnigramWord(final int formatVersion) {
233        File dictFile = null;
234        try {
235            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
236        } catch (IOException e) {
237            fail("IOException while writing an initial dictionary : " + e);
238        }
239        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
240                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
241                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
242
243        final int probability = 100;
244        addUnigramWord(binaryDictionary, "aaa", probability);
245        // Reallocate and create.
246        addUnigramWord(binaryDictionary, "aab", probability);
247        // Insert into children.
248        addUnigramWord(binaryDictionary, "aac", probability);
249        // Make terminal.
250        addUnigramWord(binaryDictionary, "aa", probability);
251        // Create children.
252        addUnigramWord(binaryDictionary, "aaaa", probability);
253        // Reallocate and make termianl.
254        addUnigramWord(binaryDictionary, "a", probability);
255
256        final int updatedProbability = 200;
257        // Update.
258        addUnigramWord(binaryDictionary, "aaa", updatedProbability);
259
260        assertEquals(probability, binaryDictionary.getFrequency("aab"));
261        assertEquals(probability, binaryDictionary.getFrequency("aac"));
262        assertEquals(probability, binaryDictionary.getFrequency("aa"));
263        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
264        assertEquals(probability, binaryDictionary.getFrequency("a"));
265        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
266
267        dictFile.delete();
268    }
269
270    public void testRandomlyAddUnigramWord() {
271        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
272            testRandomlyAddUnigramWord(formatVersion);
273        }
274    }
275
276    private void testRandomlyAddUnigramWord(final int formatVersion) {
277        final int wordCount = 1000;
278        final int codePointSetSize = 50;
279        final long seed = System.currentTimeMillis();
280
281        File dictFile = null;
282        try {
283            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
284        } catch (IOException e) {
285            fail("IOException while writing an initial dictionary : " + e);
286        }
287        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
288                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
289                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
290
291        final HashMap<String, Integer> probabilityMap = new HashMap<>();
292        // Test a word that isn't contained within the dictionary.
293        final Random random = new Random(seed);
294        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
295        for (int i = 0; i < wordCount; ++i) {
296            final String word = CodePointUtils.generateWord(random, codePointSet);
297            probabilityMap.put(word, random.nextInt(0xFF));
298        }
299        for (String word : probabilityMap.keySet()) {
300            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
301        }
302        for (String word : probabilityMap.keySet()) {
303            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
304        }
305        dictFile.delete();
306    }
307
308    public void testAddBigramWords() {
309        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
310            testAddBigramWords(formatVersion);
311        }
312    }
313
314    private void testAddBigramWords(final int formatVersion) {
315        File dictFile = null;
316        try {
317            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
318        } catch (IOException e) {
319            fail("IOException while writing an initial dictionary : " + e);
320        }
321        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
322                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
323                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
324
325        final int unigramProbability = 100;
326        final int bigramProbability = 150;
327        final int updatedBigramProbability = 200;
328        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
329        addUnigramWord(binaryDictionary, "abb", unigramProbability);
330        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
331        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
332        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
333        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
334        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
335
336        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
337        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
338        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
339        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
340        if (canCheckBigramProbability(formatVersion)) {
341            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
342            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
343            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
344            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
345        }
346
347        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
348        if (canCheckBigramProbability(formatVersion)) {
349            assertEquals(updatedBigramProbability,
350                    getBigramProbability(binaryDictionary, "aaa", "abb"));
351        }
352
353        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
354        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
355        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
356        assertEquals(Dictionary.NOT_A_PROBABILITY,
357                getBigramProbability(binaryDictionary, "bcc", "aaa"));
358        assertEquals(Dictionary.NOT_A_PROBABILITY,
359                getBigramProbability(binaryDictionary, "bcc", "bbc"));
360        assertEquals(Dictionary.NOT_A_PROBABILITY,
361                getBigramProbability(binaryDictionary, "aaa", "aaa"));
362
363        // Testing bigram link.
364        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
365        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
366        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
367        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
368        addUnigramWord(binaryDictionary, "abc", unigramProbability);
369        addUnigramWord(binaryDictionary, "f", unigramProbability);
370
371        if (canCheckBigramProbability(formatVersion)) {
372            assertEquals(bigramProbability,
373                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
374        }
375        assertEquals(Dictionary.NOT_A_PROBABILITY,
376                getBigramProbability(binaryDictionary, "abcde", "fgh"));
377        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
378        if (canCheckBigramProbability(formatVersion)) {
379            assertEquals(updatedBigramProbability,
380                    getBigramProbability(binaryDictionary, "abcde", "fghij"));
381        }
382
383        dictFile.delete();
384    }
385
386    public void testRandomlyAddBigramWords() {
387        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
388            testRandomlyAddBigramWords(formatVersion);
389        }
390    }
391
392    private void testRandomlyAddBigramWords(final int formatVersion) {
393        final int wordCount = 100;
394        final int bigramCount = 1000;
395        final int codePointSetSize = 50;
396        final long seed = System.currentTimeMillis();
397        final Random random = new Random(seed);
398
399        File dictFile = null;
400        try {
401            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
402        } catch (IOException e) {
403            fail("IOException while writing an initial dictionary : " + e);
404        }
405        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
406                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
407                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
408
409        final ArrayList<String> words = new ArrayList<>();
410        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
411        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
412        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
413        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
414
415        for (int i = 0; i < wordCount; ++i) {
416            final String word = CodePointUtils.generateWord(random, codePointSet);
417            words.add(word);
418            final int unigramProbability = random.nextInt(0xFF);
419            unigramProbabilities.put(word, unigramProbability);
420            addUnigramWord(binaryDictionary, word, unigramProbability);
421        }
422
423        for (int i = 0; i < bigramCount; i++) {
424            final String word0 = words.get(random.nextInt(wordCount));
425            final String word1 = words.get(random.nextInt(wordCount));
426            if (TextUtils.equals(word0, word1)) {
427                continue;
428            }
429            final Pair<String, String> bigram = new Pair<>(word0, word1);
430            bigramWords.add(bigram);
431            final int unigramProbability = unigramProbabilities.get(word1);
432            final int bigramProbability =
433                    unigramProbability + random.nextInt(0xFF - unigramProbability);
434            bigramProbabilities.put(bigram, bigramProbability);
435            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
436        }
437
438        for (final Pair<String, String> bigram : bigramWords) {
439            final int bigramProbability = bigramProbabilities.get(bigram);
440            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
441                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
442            if (canCheckBigramProbability(formatVersion)) {
443                assertEquals(bigramProbability,
444                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
445            }
446        }
447
448        dictFile.delete();
449    }
450
451    public void testRemoveBigramWords() {
452        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
453            testRemoveBigramWords(formatVersion);
454        }
455    }
456
457    private void testRemoveBigramWords(final int formatVersion) {
458        File dictFile = null;
459        try {
460            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
461        } catch (IOException e) {
462            fail("IOException while writing an initial dictionary : " + e);
463        }
464        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
465                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
466                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
467        final int unigramProbability = 100;
468        final int bigramProbability = 150;
469        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
470        addUnigramWord(binaryDictionary, "abb", unigramProbability);
471        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
472        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
473        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
474        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
475        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
476
477        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
478        assertTrue(isValidBigram(binaryDictionary, "aaa", "bcc"));
479        assertTrue(isValidBigram(binaryDictionary, "abb", "aaa"));
480        assertTrue(isValidBigram(binaryDictionary, "abb", "bcc"));
481
482        removeBigramEntry(binaryDictionary, "aaa", "abb");
483        assertFalse(isValidBigram(binaryDictionary, "aaa", "abb"));
484        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
485        assertTrue(isValidBigram(binaryDictionary, "aaa", "abb"));
486
487
488        removeBigramEntry(binaryDictionary, "aaa", "bcc");
489        assertFalse(isValidBigram(binaryDictionary, "aaa", "bcc"));
490        removeBigramEntry(binaryDictionary, "abb", "aaa");
491        assertFalse(isValidBigram(binaryDictionary, "abb", "aaa"));
492        removeBigramEntry(binaryDictionary, "abb", "bcc");
493        assertFalse(isValidBigram(binaryDictionary, "abb", "bcc"));
494
495        removeBigramEntry(binaryDictionary, "aaa", "abb");
496        // Test remove non-existing bigram operation.
497        removeBigramEntry(binaryDictionary, "aaa", "abb");
498        removeBigramEntry(binaryDictionary, "bcc", "aaa");
499
500        dictFile.delete();
501    }
502
503    public void testFlushDictionary() {
504        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
505            testFlushDictionary(formatVersion);
506        }
507    }
508
509    private void testFlushDictionary(final int formatVersion) {
510        File dictFile = null;
511        try {
512            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
513        } catch (IOException e) {
514            fail("IOException while writing an initial dictionary : " + e);
515        }
516        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
517                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
518                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
519
520        final int probability = 100;
521        addUnigramWord(binaryDictionary, "aaa", probability);
522        addUnigramWord(binaryDictionary, "abcd", probability);
523        // Close without flushing.
524        binaryDictionary.close();
525
526        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
527                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
528                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
529
530        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
531        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
532
533        addUnigramWord(binaryDictionary, "aaa", probability);
534        addUnigramWord(binaryDictionary, "abcd", probability);
535        binaryDictionary.flush();
536        binaryDictionary.close();
537
538        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
539                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
540                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
541
542        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
543        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
544        addUnigramWord(binaryDictionary, "bcde", probability);
545        binaryDictionary.flush();
546        binaryDictionary.close();
547
548        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
549                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
550                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
551        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
552        binaryDictionary.close();
553
554        dictFile.delete();
555    }
556
557    public void testFlushWithGCDictionary() {
558        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
559            testFlushWithGCDictionary(formatVersion);
560        }
561    }
562
563    private void testFlushWithGCDictionary(final int formatVersion) {
564        File dictFile = null;
565        try {
566            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
567        } catch (IOException e) {
568            fail("IOException while writing an initial dictionary : " + e);
569        }
570        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
571                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
572                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
573
574        final int unigramProbability = 100;
575        final int bigramProbability = 150;
576        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
577        addUnigramWord(binaryDictionary, "abb", unigramProbability);
578        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
579        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
580        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
581        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
582        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
583        binaryDictionary.flushWithGC();
584        binaryDictionary.close();
585
586        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
587                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
588                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
589        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
590        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
591        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
592        if (canCheckBigramProbability(formatVersion)) {
593            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "abb"));
594            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bcc"));
595            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "aaa"));
596            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "abb", "bcc"));
597        }
598        assertFalse(isValidBigram(binaryDictionary, "bcc", "aaa"));
599        assertFalse(isValidBigram(binaryDictionary, "bcc", "bbc"));
600        assertFalse(isValidBigram(binaryDictionary, "aaa", "aaa"));
601        binaryDictionary.flushWithGC();
602        binaryDictionary.close();
603
604        dictFile.delete();
605    }
606
607    public void testAddBigramWordsAndFlashWithGC() {
608        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
609            testAddBigramWordsAndFlashWithGC(formatVersion);
610        }
611    }
612
613    // TODO: Evaluate performance of GC
614    private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
615        final int wordCount = 100;
616        final int bigramCount = 1000;
617        final int codePointSetSize = 30;
618        final long seed = System.currentTimeMillis();
619        final Random random = new Random(seed);
620
621        File dictFile = null;
622        try {
623            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
624        } catch (IOException e) {
625            fail("IOException while writing an initial dictionary : " + e);
626        }
627
628        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
629                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
630                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
631
632        final ArrayList<String> words = new ArrayList<>();
633        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
634        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
635        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
636        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
637
638        for (int i = 0; i < wordCount; ++i) {
639            final String word = CodePointUtils.generateWord(random, codePointSet);
640            words.add(word);
641            final int unigramProbability = random.nextInt(0xFF);
642            unigramProbabilities.put(word, unigramProbability);
643            addUnigramWord(binaryDictionary, word, unigramProbability);
644        }
645
646        for (int i = 0; i < bigramCount; i++) {
647            final String word0 = words.get(random.nextInt(wordCount));
648            final String word1 = words.get(random.nextInt(wordCount));
649            if (TextUtils.equals(word0, word1)) {
650                continue;
651            }
652            final Pair<String, String> bigram = new Pair<>(word0, word1);
653            bigramWords.add(bigram);
654            final int unigramProbability = unigramProbabilities.get(word1);
655            final int bigramProbability =
656                    unigramProbability + random.nextInt(0xFF - unigramProbability);
657            bigramProbabilities.put(bigram, bigramProbability);
658            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
659        }
660
661        binaryDictionary.flushWithGC();
662        binaryDictionary.close();
663        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
664                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
665                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
666
667
668        for (final Pair<String, String> bigram : bigramWords) {
669            final int bigramProbability = bigramProbabilities.get(bigram);
670            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
671                    isValidBigram(binaryDictionary, bigram.first, bigram.second));
672            if (canCheckBigramProbability(formatVersion)) {
673                assertEquals(bigramProbability,
674                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
675            }
676        }
677
678        dictFile.delete();
679    }
680
681    public void testRandomOperationsAndFlashWithGC() {
682        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
683            testRandomOperationsAndFlashWithGC(formatVersion);
684        }
685    }
686
687    private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
688        final int flashWithGCIterationCount = 50;
689        final int operationCountInEachIteration = 200;
690        final int initialUnigramCount = 100;
691        final float addUnigramProb = 0.5f;
692        final float addBigramProb = 0.8f;
693        final float removeBigramProb = 0.2f;
694        final int codePointSetSize = 30;
695
696        final long seed = System.currentTimeMillis();
697        final Random random = new Random(seed);
698
699        File dictFile = null;
700        try {
701            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
702        } catch (IOException e) {
703            fail("IOException while writing an initial dictionary : " + e);
704        }
705
706        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
707                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
708                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
709        final ArrayList<String> words = new ArrayList<>();
710        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<>();
711        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
712        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
713        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
714        for (int i = 0; i < initialUnigramCount; ++i) {
715            final String word = CodePointUtils.generateWord(random, codePointSet);
716            words.add(word);
717            final int unigramProbability = random.nextInt(0xFF);
718            unigramProbabilities.put(word, unigramProbability);
719            addUnigramWord(binaryDictionary, word, unigramProbability);
720        }
721        binaryDictionary.flushWithGC();
722        binaryDictionary.close();
723
724        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
725            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
726                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
727                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
728            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
729                // Add unigram.
730                if (random.nextFloat() < addUnigramProb) {
731                    final String word = CodePointUtils.generateWord(random, codePointSet);
732                    words.add(word);
733                    final int unigramProbability = random.nextInt(0xFF);
734                    unigramProbabilities.put(word, unigramProbability);
735                    addUnigramWord(binaryDictionary, word, unigramProbability);
736                }
737                // Add bigram.
738                if (random.nextFloat() < addBigramProb && words.size() > 2) {
739                    final int word0Index = random.nextInt(words.size());
740                    int word1Index = random.nextInt(words.size() - 1);
741                    if (word0Index <= word1Index) {
742                        word1Index++;
743                    }
744                    final String word0 = words.get(word0Index);
745                    final String word1 = words.get(word1Index);
746                    if (TextUtils.equals(word0, word1)) {
747                        continue;
748                    }
749                    final int unigramProbability = unigramProbabilities.get(word1);
750                    final int bigramProbability =
751                            unigramProbability + random.nextInt(0xFF - unigramProbability);
752                    final Pair<String, String> bigram = new Pair<>(word0, word1);
753                    bigramWords.add(bigram);
754                    bigramProbabilities.put(bigram, bigramProbability);
755                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
756                }
757                // Remove bigram.
758                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
759                    final int bigramIndex = random.nextInt(bigramWords.size());
760                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
761                    bigramWords.remove(bigramIndex);
762                    bigramProbabilities.remove(bigram);
763                    removeBigramEntry(binaryDictionary, bigram.first, bigram.second);
764                }
765            }
766
767            // Test whether the all unigram operations are collectlly handled.
768            for (int i = 0; i < words.size(); i++) {
769                final String word = words.get(i);
770                final int unigramProbability = unigramProbabilities.get(word);
771                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
772            }
773            // Test whether the all bigram operations are collectlly handled.
774            for (int i = 0; i < bigramWords.size(); i++) {
775                final Pair<String, String> bigram = bigramWords.get(i);
776                final int probability;
777                if (bigramProbabilities.containsKey(bigram)) {
778                    final int bigramProbability = bigramProbabilities.get(bigram);
779                    probability = bigramProbability;
780                } else {
781                    probability = Dictionary.NOT_A_PROBABILITY;
782                }
783
784                if (canCheckBigramProbability(formatVersion)) {
785                    assertEquals(probability,
786                            getBigramProbability(binaryDictionary, bigram.first, bigram.second));
787                }
788                assertEquals(probability != Dictionary.NOT_A_PROBABILITY,
789                        isValidBigram(binaryDictionary, bigram.first, bigram.second));
790            }
791            binaryDictionary.flushWithGC();
792            binaryDictionary.close();
793        }
794
795        dictFile.delete();
796    }
797
798    public void testAddManyUnigramsAndFlushWithGC() {
799        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
800            testAddManyUnigramsAndFlushWithGC(formatVersion);
801        }
802    }
803
804    private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
805        final int flashWithGCIterationCount = 3;
806        final int codePointSetSize = 50;
807
808        final long seed = System.currentTimeMillis();
809        final Random random = new Random(seed);
810
811        File dictFile = null;
812        try {
813            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
814        } catch (IOException e) {
815            fail("IOException while writing an initial dictionary : " + e);
816        }
817
818        final ArrayList<String> words = new ArrayList<>();
819        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
820        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
821
822        BinaryDictionary binaryDictionary;
823        for (int i = 0; i < flashWithGCIterationCount; i++) {
824            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
825                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
826                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
827            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
828                final String word = CodePointUtils.generateWord(random, codePointSet);
829                words.add(word);
830                final int unigramProbability = random.nextInt(0xFF);
831                unigramProbabilities.put(word, unigramProbability);
832                addUnigramWord(binaryDictionary, word, unigramProbability);
833            }
834
835            for (int j = 0; j < words.size(); j++) {
836                final String word = words.get(j);
837                final int unigramProbability = unigramProbabilities.get(word);
838                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
839            }
840
841            binaryDictionary.flushWithGC();
842            binaryDictionary.close();
843        }
844
845        dictFile.delete();
846    }
847
848    public void testUnigramAndBigramCount() {
849        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
850            testUnigramAndBigramCount(formatVersion);
851        }
852    }
853
854    private void testUnigramAndBigramCount(final int formatVersion) {
855        final int flashWithGCIterationCount = 10;
856        final int codePointSetSize = 50;
857        final int unigramCountPerIteration = 1000;
858        final int bigramCountPerIteration = 2000;
859        final long seed = System.currentTimeMillis();
860        final Random random = new Random(seed);
861
862        File dictFile = null;
863        try {
864            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
865        } catch (IOException e) {
866            fail("IOException while writing an initial dictionary : " + e);
867        }
868
869        final ArrayList<String> words = new ArrayList<>();
870        final HashSet<Pair<String, String>> bigrams = new HashSet<>();
871        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
872
873        BinaryDictionary binaryDictionary;
874        for (int i = 0; i < flashWithGCIterationCount; i++) {
875            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
876                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
877                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
878            for (int j = 0; j < unigramCountPerIteration; j++) {
879                final String word = CodePointUtils.generateWord(random, codePointSet);
880                words.add(word);
881                final int unigramProbability = random.nextInt(0xFF);
882                addUnigramWord(binaryDictionary, word, unigramProbability);
883            }
884            for (int j = 0; j < bigramCountPerIteration; j++) {
885                final String word0 = words.get(random.nextInt(words.size()));
886                final String word1 = words.get(random.nextInt(words.size()));
887                if (TextUtils.equals(word0, word1)) {
888                    continue;
889                }
890                bigrams.add(new Pair<>(word0, word1));
891                final int bigramProbability = random.nextInt(0xF);
892                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
893            }
894            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
895                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
896            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
897                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
898            binaryDictionary.flushWithGC();
899            assertEquals(new HashSet<>(words).size(), Integer.parseInt(
900                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
901            assertEquals(new HashSet<>(bigrams).size(), Integer.parseInt(
902                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
903            binaryDictionary.close();
904        }
905
906        dictFile.delete();
907    }
908
909    public void testAddMultipleDictionaryEntries() {
910        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
911            testAddMultipleDictionaryEntries(formatVersion);
912        }
913    }
914
915    private void testAddMultipleDictionaryEntries(final int formatVersion) {
916        final int codePointSetSize = 20;
917        final int lmParamCount = 1000;
918        final double bigramContinueRate = 0.9;
919        final long seed = System.currentTimeMillis();
920        final Random random = new Random(seed);
921
922        File dictFile = null;
923        try {
924            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
925        } catch (IOException e) {
926            fail("IOException while writing an initial dictionary : " + e);
927        }
928
929        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
930        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
931        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
932
933        final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
934        String prevWord = null;
935        for (int i = 0; i < languageModelParams.length; i++) {
936            final String word = CodePointUtils.generateWord(random, codePointSet);
937            final int probability = random.nextInt(0xFF);
938            final int bigramProbability = probability + random.nextInt(0xFF - probability);
939            unigramProbabilities.put(word, probability);
940            if (prevWord == null) {
941                languageModelParams[i] = new LanguageModelParam(word, probability,
942                        BinaryDictionary.NOT_A_VALID_TIMESTAMP);
943            } else {
944                languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
945                        bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
946                bigramProbabilities.put(new Pair<>(prevWord, word),
947                        bigramProbability);
948            }
949            prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
950        }
951
952        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
953                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
954                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
955        binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
956
957        for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
958            assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
959        }
960
961        for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
962            final String word0 = entry.getKey().first;
963            final String word1 = entry.getKey().second;
964            final int bigramProbability = entry.getValue();
965            assertEquals(bigramProbability != Dictionary.NOT_A_PROBABILITY,
966                    isValidBigram(binaryDictionary, word0, word1));
967            if (canCheckBigramProbability(formatVersion)) {
968                assertEquals(bigramProbability,
969                        getBigramProbability(binaryDictionary, word0, word1));
970            }
971        }
972    }
973
974    public void testGetWordProperties() {
975        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
976            testGetWordProperties(formatVersion);
977        }
978    }
979
980    private void testGetWordProperties(final int formatVersion) {
981        final long seed = System.currentTimeMillis();
982        final Random random = new Random(seed);
983        final int UNIGRAM_COUNT = 1000;
984        final int BIGRAM_COUNT = 1000;
985        final int codePointSetSize = 20;
986        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
987
988        File dictFile = null;
989        try {
990            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
991        } catch (IOException e) {
992            fail("IOException while writing an initial dictionary : " + e);
993        }
994        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
995                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
996                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
997
998        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
999                false /* isBeginningOfSentence */);
1000        assertFalse(invalidWordProperty.isValid());
1001
1002        final ArrayList<String> words = new ArrayList<>();
1003        final HashMap<String, Integer> wordProbabilities = new HashMap<>();
1004        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1005        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1006
1007        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1008            final String word = CodePointUtils.generateWord(random, codePointSet);
1009            final int unigramProbability = random.nextInt(0xFF);
1010            final boolean isNotAWord = random.nextBoolean();
1011            final boolean isBlacklisted = random.nextBoolean();
1012            // TODO: Add tests for historical info.
1013            binaryDictionary.addUnigramEntry(word, unigramProbability,
1014                    null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
1015                    false /* isBeginningOfSentence */, isNotAWord, isBlacklisted,
1016                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
1017            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1018                binaryDictionary.flushWithGC();
1019            }
1020            words.add(word);
1021            wordProbabilities.put(word, unigramProbability);
1022            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1023                    false /* isBeginningOfSentence */);
1024            assertEquals(word, wordProperty.mWord);
1025            assertTrue(wordProperty.isValid());
1026            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
1027            assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
1028            assertEquals(false, wordProperty.mHasBigrams);
1029            assertEquals(false, wordProperty.mHasShortcuts);
1030            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
1031            assertTrue(wordProperty.mShortcutTargets.isEmpty());
1032        }
1033
1034        for (int i = 0; i < BIGRAM_COUNT; i++) {
1035            final int word0Index = random.nextInt(wordProbabilities.size());
1036            final int word1Index = random.nextInt(wordProbabilities.size());
1037            if (word0Index == word1Index) {
1038                continue;
1039            }
1040            final String word0 = words.get(word0Index);
1041            final String word1 = words.get(word1Index);
1042            final int unigramProbability = wordProbabilities.get(word1);
1043            final int bigramProbability =
1044                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1045            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1046            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1047                binaryDictionary.flushWithGC();
1048            }
1049            if (!bigrams.containsKey(word0)) {
1050                final HashSet<String> bigramWord1s = new HashSet<>();
1051                bigrams.put(word0, bigramWord1s);
1052            }
1053            bigrams.get(word0).add(word1);
1054            bigramProbabilities.put(new Pair<>(word0, word1), bigramProbability);
1055        }
1056
1057        for (int i = 0; i < words.size(); i++) {
1058            final String word0 = words.get(i);
1059            if (!bigrams.containsKey(word0)) {
1060                continue;
1061            }
1062            final HashSet<String> bigramWord1s = bigrams.get(word0);
1063            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
1064                    false /* isBeginningOfSentence */);
1065            assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
1066            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1067                final String word1 = wordProperty.mBigrams.get(j).mWord;
1068                assertTrue(bigramWord1s.contains(word1));
1069                if (canCheckBigramProbability(formatVersion)) {
1070                    final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
1071                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1072                }
1073            }
1074        }
1075    }
1076
1077    public void testIterateAllWords() {
1078        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1079            testIterateAllWords(formatVersion);
1080        }
1081    }
1082
1083    private void testIterateAllWords(final int formatVersion) {
1084        final long seed = System.currentTimeMillis();
1085        final Random random = new Random(seed);
1086        final int UNIGRAM_COUNT = 1000;
1087        final int BIGRAM_COUNT = 1000;
1088        final int codePointSetSize = 20;
1089        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1090
1091        File dictFile = null;
1092        try {
1093            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1094        } catch (IOException e) {
1095            fail("IOException while writing an initial dictionary : " + e);
1096        }
1097        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1098                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1099                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1100
1101        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord",
1102                false /* isBeginningOfSentence */);
1103        assertFalse(invalidWordProperty.isValid());
1104
1105        final ArrayList<String> words = new ArrayList<>();
1106        final HashMap<String, Integer> wordProbabilitiesToCheckLater = new HashMap<>();
1107        final HashMap<String, HashSet<String>> bigrams = new HashMap<>();
1108        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
1109                new HashMap<>();
1110
1111        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1112            final String word = CodePointUtils.generateWord(random, codePointSet);
1113            final int unigramProbability = random.nextInt(0xFF);
1114            addUnigramWord(binaryDictionary, word, unigramProbability);
1115            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1116                binaryDictionary.flushWithGC();
1117            }
1118            words.add(word);
1119            wordProbabilitiesToCheckLater.put(word, unigramProbability);
1120        }
1121
1122        for (int i = 0; i < BIGRAM_COUNT; i++) {
1123            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1124            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1125            if (word0Index == word1Index) {
1126                continue;
1127            }
1128            final String word0 = words.get(word0Index);
1129            final String word1 = words.get(word1Index);
1130            final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
1131            final int bigramProbability =
1132                    unigramProbability + random.nextInt(0xFF - unigramProbability);
1133            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1134            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1135                binaryDictionary.flushWithGC();
1136            }
1137            if (!bigrams.containsKey(word0)) {
1138                final HashSet<String> bigramWord1s = new HashSet<>();
1139                bigrams.put(word0, bigramWord1s);
1140            }
1141            bigrams.get(word0).add(word1);
1142            bigramProbabilitiesToCheckLater.put(new Pair<>(word0, word1), bigramProbability);
1143        }
1144
1145        final HashSet<String> wordSet = new HashSet<>(words);
1146        final HashSet<Pair<String, String>> bigramSet =
1147                new HashSet<>(bigramProbabilitiesToCheckLater.keySet());
1148        int token = 0;
1149        do {
1150            final BinaryDictionary.GetNextWordPropertyResult result =
1151                    binaryDictionary.getNextWordProperty(token);
1152            final WordProperty wordProperty = result.mWordProperty;
1153            final String word0 = wordProperty.mWord;
1154            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
1155                    wordProperty.mProbabilityInfo.mProbability);
1156            wordSet.remove(word0);
1157            final HashSet<String> bigramWord1s = bigrams.get(word0);
1158            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1159                final String word1 = wordProperty.mBigrams.get(j).mWord;
1160                assertTrue(bigramWord1s.contains(word1));
1161                final Pair<String, String> bigram = new Pair<>(word0, word1);
1162                if (canCheckBigramProbability(formatVersion)) {
1163                    final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
1164                    assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
1165                }
1166                bigramSet.remove(bigram);
1167            }
1168            token = result.mNextToken;
1169        } while (token != 0);
1170        assertTrue(wordSet.isEmpty());
1171        assertTrue(bigramSet.isEmpty());
1172    }
1173
1174    public void testAddShortcuts() {
1175        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1176            testAddShortcuts(formatVersion);
1177        }
1178    }
1179
1180    private void testAddShortcuts(final int formatVersion) {
1181        File dictFile = null;
1182        try {
1183            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1184        } catch (IOException e) {
1185            fail("IOException while writing an initial dictionary : " + e);
1186        }
1187        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1188                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1189                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1190
1191        final int unigramProbability = 100;
1192        final int shortcutProbability = 10;
1193        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1194                shortcutProbability, false /* isBeginningOfSentence */,
1195                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1196        WordProperty wordProperty = binaryDictionary.getWordProperty("aaa",
1197                false /* isBeginningOfSentence */);
1198        assertEquals(1, wordProperty.mShortcutTargets.size());
1199        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1200        assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
1201        final int updatedShortcutProbability = 2;
1202        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "zzz",
1203                updatedShortcutProbability, false /* isBeginningOfSentence */,
1204                false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */);
1205        wordProperty = binaryDictionary.getWordProperty("aaa",
1206                false /* isBeginningOfSentence */);
1207        assertEquals(1, wordProperty.mShortcutTargets.size());
1208        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1209        assertEquals(updatedShortcutProbability,
1210                wordProperty.mShortcutTargets.get(0).getProbability());
1211        binaryDictionary.addUnigramEntry("aaa", unigramProbability, "yyy",
1212                shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1213                false /* isBlacklisted */, 0 /* timestamp */);
1214        final HashMap<String, Integer> shortcutTargets = new HashMap<>();
1215        shortcutTargets.put("zzz", updatedShortcutProbability);
1216        shortcutTargets.put("yyy", shortcutProbability);
1217        wordProperty = binaryDictionary.getWordProperty("aaa",
1218                false /* isBeginningOfSentence */);
1219        assertEquals(2, wordProperty.mShortcutTargets.size());
1220        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1221            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1222            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1223                    shortcutTarget.getProbability());
1224            shortcutTargets.remove(shortcutTarget.mWord);
1225        }
1226        shortcutTargets.put("zzz", updatedShortcutProbability);
1227        shortcutTargets.put("yyy", shortcutProbability);
1228        binaryDictionary.flushWithGC();
1229        wordProperty = binaryDictionary.getWordProperty("aaa",
1230                false /* isBeginningOfSentence */);
1231        assertEquals(2, wordProperty.mShortcutTargets.size());
1232        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1233            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1234            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1235                    shortcutTarget.getProbability());
1236            shortcutTargets.remove(shortcutTarget.mWord);
1237        }
1238    }
1239
1240    public void testAddManyShortcuts() {
1241        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1242            testAddManyShortcuts(formatVersion);
1243        }
1244    }
1245
1246    private void testAddManyShortcuts(final int formatVersion) {
1247        final long seed = System.currentTimeMillis();
1248        final Random random = new Random(seed);
1249        final int UNIGRAM_COUNT = 1000;
1250        final int SHORTCUT_COUNT = 10000;
1251        final int codePointSetSize = 20;
1252        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1253
1254        final ArrayList<String> words = new ArrayList<>();
1255        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1256        final HashMap<String, HashMap<String, Integer>> shortcutTargets = new HashMap<>();
1257
1258        File dictFile = null;
1259        try {
1260            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1261        } catch (IOException e) {
1262            fail("IOException while writing an initial dictionary : " + e);
1263        }
1264        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1265                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1266                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1267
1268        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1269            final String word = CodePointUtils.generateWord(random, codePointSet);
1270            final int unigramProbability = random.nextInt(0xFF);
1271            addUnigramWord(binaryDictionary, word, unigramProbability);
1272            words.add(word);
1273            unigramProbabilities.put(word, unigramProbability);
1274            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1275                binaryDictionary.flushWithGC();
1276            }
1277        }
1278        for (int i = 0; i < SHORTCUT_COUNT; i++) {
1279            final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
1280            final int shortcutProbability = random.nextInt(0xF);
1281            final String word = words.get(random.nextInt(words.size()));
1282            final int unigramProbability = unigramProbabilities.get(word);
1283            binaryDictionary.addUnigramEntry(word, unigramProbability, shortcutTarget,
1284                    shortcutProbability, false /* isBeginningOfSentence */, false /* isNotAWord */,
1285                    false /* isBlacklisted */, 0 /* timestamp */);
1286            if (shortcutTargets.containsKey(word)) {
1287                final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
1288                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1289            } else {
1290                final HashMap<String, Integer> shortcutTargetsOfWord = new HashMap<>();
1291                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1292                shortcutTargets.put(word, shortcutTargetsOfWord);
1293            }
1294            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1295                binaryDictionary.flushWithGC();
1296            }
1297        }
1298
1299        for (final String word : words) {
1300            final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
1301                    false /* isBeginningOfSentence */);
1302            assertEquals((int)unigramProbabilities.get(word),
1303                    wordProperty.mProbabilityInfo.mProbability);
1304            if (!shortcutTargets.containsKey(word)) {
1305                // The word does not have shortcut targets.
1306                continue;
1307            }
1308            assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
1309            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1310                final String targetCodePonts = shortcutTarget.mWord;
1311                assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
1312                        shortcutTarget.getProbability());
1313            }
1314        }
1315    }
1316
1317    public void testDictMigration() {
1318        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1319            testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1320        }
1321    }
1322
1323    private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1324        File dictFile = null;
1325        try {
1326            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1327        } catch (IOException e) {
1328            fail("IOException while writing an initial dictionary : " + e);
1329        }
1330        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1331                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1332                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1333        final int unigramProbability = 100;
1334        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
1335        addUnigramWord(binaryDictionary, "bbb", unigramProbability);
1336        final int bigramProbability = 150;
1337        addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability);
1338        final int shortcutProbability = 10;
1339        binaryDictionary.addUnigramEntry("ccc", unigramProbability, "xxx", shortcutProbability,
1340                false /* isBeginningOfSentence */, false /* isNotAWord */,
1341                false /* isBlacklisted */, 0 /* timestamp */);
1342        binaryDictionary.addUnigramEntry("ddd", unigramProbability, null /* shortcutTarget */,
1343                Dictionary.NOT_A_PROBABILITY, false /* isBeginningOfSentence */,
1344                true /* isNotAWord */, true /* isBlacklisted */, 0 /* timestamp */);
1345        binaryDictionary.addNgramEntry(PrevWordsInfo.BEGINNING_OF_SENTENCE,
1346                "aaa", bigramProbability, 0 /* timestamp */);
1347        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1348        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1349        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1350        assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion());
1351        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1352        assertTrue(binaryDictionary.isValidDictionary());
1353        assertEquals(toFormatVersion, binaryDictionary.getFormatVersion());
1354        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
1355        assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb"));
1356        if (canCheckBigramProbability(toFormatVersion)) {
1357            assertEquals(bigramProbability, getBigramProbability(binaryDictionary, "aaa", "bbb"));
1358            assertEquals(bigramProbability, binaryDictionary.getNgramProbability(
1359                    PrevWordsInfo.BEGINNING_OF_SENTENCE, "aaa"));
1360        }
1361        assertTrue(isValidBigram(binaryDictionary, "aaa", "bbb"));
1362        WordProperty wordProperty = binaryDictionary.getWordProperty("ccc",
1363                false /* isBeginningOfSentence */);
1364        assertEquals(1, wordProperty.mShortcutTargets.size());
1365        assertEquals("xxx", wordProperty.mShortcutTargets.get(0).mWord);
1366        wordProperty = binaryDictionary.getWordProperty("ddd",
1367                false /* isBeginningOfSentence */);
1368        assertTrue(wordProperty.mIsBlacklistEntry);
1369        assertTrue(wordProperty.mIsNotAWord);
1370    }
1371
1372    public void testLargeDictMigration() {
1373        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1374            testLargeDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, formatVersion);
1375        }
1376    }
1377
1378    private void testLargeDictMigration(final int fromFormatVersion, final int toFormatVersion) {
1379        final int UNIGRAM_COUNT = 3000;
1380        final int BIGRAM_COUNT = 3000;
1381        final int codePointSetSize = 50;
1382        final long seed = System.currentTimeMillis();
1383        final Random random = new Random(seed);
1384
1385        File dictFile = null;
1386        try {
1387            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion);
1388        } catch (IOException e) {
1389            fail("IOException while writing an initial dictionary : " + e);
1390        }
1391        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1392                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1393                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1394
1395        final ArrayList<String> words = new ArrayList<>();
1396        final ArrayList<Pair<String, String>> bigrams = new ArrayList<>();
1397        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1398        final HashMap<String, Integer> unigramProbabilities = new HashMap<>();
1399        final HashMap<Pair<String, String>, Integer> bigramProbabilities = new HashMap<>();
1400
1401        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1402            final String word = CodePointUtils.generateWord(random, codePointSet);
1403            final int unigramProbability = random.nextInt(0xFF);
1404            addUnigramWord(binaryDictionary, word, unigramProbability);
1405            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1406                binaryDictionary.flushWithGC();
1407            }
1408            words.add(word);
1409            unigramProbabilities.put(word, unigramProbability);
1410        }
1411
1412        for (int i = 0; i < BIGRAM_COUNT; i++) {
1413            final int word0Index = random.nextInt(words.size());
1414            final int word1Index = random.nextInt(words.size());
1415            if (word0Index == word1Index) {
1416                continue;
1417            }
1418            final String word0 = words.get(word0Index);
1419            final String word1 = words.get(word1Index);
1420            final int unigramProbability = unigramProbabilities.get(word1);
1421            final int bigramProbability =
1422                    random.nextInt(0xFF - unigramProbability) + unigramProbability;
1423            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
1424            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1425                binaryDictionary.flushWithGC();
1426            }
1427            final Pair<String, String> bigram = new Pair<>(word0, word1);
1428            bigrams.add(bigram);
1429            bigramProbabilities.put(bigram, bigramProbability);
1430        }
1431        assertTrue(binaryDictionary.migrateTo(toFormatVersion));
1432
1433        for (final String word : words) {
1434            assertEquals((int)unigramProbabilities.get(word), binaryDictionary.getFrequency(word));
1435        }
1436        assertEquals(unigramProbabilities.size(), Integer.parseInt(
1437                binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
1438
1439        for (final Pair<String, String> bigram : bigrams) {
1440            if (canCheckBigramProbability(toFormatVersion)) {
1441                assertEquals((int)bigramProbabilities.get(bigram),
1442                        getBigramProbability(binaryDictionary, bigram.first, bigram.second));
1443            }
1444            assertTrue(isValidBigram(binaryDictionary, bigram.first, bigram.second));
1445        }
1446        assertEquals(bigramProbabilities.size(), Integer.parseInt(
1447                binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
1448    }
1449
1450    public void testBeginningOfSentence() {
1451        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1452            if (supportsBeginningOfSentence(formatVersion)) {
1453                testBeginningOfSentence(formatVersion);
1454            }
1455        }
1456    }
1457
1458    private void testBeginningOfSentence(final int formatVersion) {
1459        File dictFile = null;
1460        try {
1461            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1462        } catch (IOException e) {
1463            fail("IOException while writing an initial dictionary : " + e);
1464        }
1465        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1466                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1467                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1468        final int dummyProbability = 0;
1469        final PrevWordsInfo prevWordsInfoBeginningOfSentence = PrevWordsInfo.BEGINNING_OF_SENTENCE;
1470        final int bigramProbability = 200;
1471        addUnigramWord(binaryDictionary, "aaa", dummyProbability);
1472        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1473                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1474        assertEquals(bigramProbability,
1475                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1476        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "aaa", bigramProbability,
1477                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1478        addUnigramWord(binaryDictionary, "bbb", dummyProbability);
1479        binaryDictionary.addNgramEntry(prevWordsInfoBeginningOfSentence, "bbb", bigramProbability,
1480                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
1481        binaryDictionary.flushWithGC();
1482        assertEquals(bigramProbability,
1483                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "aaa"));
1484        assertEquals(bigramProbability,
1485                binaryDictionary.getNgramProbability(prevWordsInfoBeginningOfSentence, "bbb"));
1486    }
1487
1488    public void testGetMaxFrequencyOfExactMatches() {
1489        for (final int formatVersion : DICT_FORMAT_VERSIONS) {
1490            testGetMaxFrequencyOfExactMatches(formatVersion);
1491        }
1492    }
1493
1494    private void testGetMaxFrequencyOfExactMatches(final int formatVersion) {
1495        File dictFile = null;
1496        try {
1497            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1498        } catch (IOException e) {
1499            fail("IOException while writing an initial dictionary : " + e);
1500        }
1501        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1502                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1503                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1504        addUnigramWord(binaryDictionary, "abc", 10);
1505        addUnigramWord(binaryDictionary, "aBc", 15);
1506        assertEquals(15, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1507        addUnigramWord(binaryDictionary, "ab'c", 20);
1508        assertEquals(20, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1509        addUnigramWord(binaryDictionary, "a-b-c", 25);
1510        assertEquals(25, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1511        addUnigramWord(binaryDictionary, "ab-'-'-'-c", 30);
1512        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1513        addUnigramWord(binaryDictionary, "ab c", 255);
1514        assertEquals(30, binaryDictionary.getMaxFrequencyOfExactMatches("abc"));
1515    }
1516}
1517