BinaryDictionaryTests.java revision 98705b6bf544cff4d781fae8b1ef4e3fabc6b2a3
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.text.TextUtils;
22import android.util.Pair;
23
24import com.android.inputmethod.latin.makedict.CodePointUtils;
25import com.android.inputmethod.latin.makedict.FormatSpec;
26import com.android.inputmethod.latin.makedict.WeightedString;
27import com.android.inputmethod.latin.makedict.WordProperty;
28import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
29import com.android.inputmethod.latin.utils.FileUtils;
30import com.android.inputmethod.latin.utils.LanguageModelParam;
31
32import java.io.File;
33import java.io.IOException;
34import java.util.ArrayList;
35import java.util.HashMap;
36import java.util.HashSet;
37import java.util.Locale;
38import java.util.Map;
39import java.util.Random;
40
41// TODO Use the seed passed as an argument for makedict test.
42@LargeTest
43public class BinaryDictionaryTests extends AndroidTestCase {
44    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
45    private static final String TEST_LOCALE = "test";
46
47    private File createEmptyDictionaryAndGetFile(final String dictId,
48            final int formatVersion) throws IOException {
49       if (formatVersion == FormatSpec.VERSION4) {
50            return createEmptyVer4DictionaryAndGetFile(dictId);
51        } else {
52            throw new IOException("Dictionary format version " + formatVersion
53                    + " is not supported.");
54        }
55    }
56
57    private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException {
58        final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION,
59                getContext().getCacheDir());
60        file.delete();
61        file.mkdir();
62        Map<String, String> attributeMap = new HashMap<String, String>();
63        if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4,
64                Locale.ENGLISH, attributeMap)) {
65            return file;
66        } else {
67            throw new IOException("Empty dictionary " + file.getAbsolutePath()
68                    + " cannot be created.");
69        }
70    }
71
72    public void testIsValidDictionary() {
73        testIsValidDictionary(FormatSpec.VERSION4);
74    }
75
76    private void testIsValidDictionary(final int formatVersion) {
77        File dictFile = null;
78        try {
79            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
80        } catch (IOException e) {
81            fail("IOException while writing an initial dictionary : " + e);
82        }
83        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
84                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
85                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
86        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
87                binaryDictionary.isValidDictionary());
88        binaryDictionary.close();
89        assertFalse("binaryDictionary must be invalid after closing.",
90                binaryDictionary.isValidDictionary());
91        FileUtils.deleteRecursively(dictFile);
92        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
93                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
94                TEST_LOCALE, true /* isUpdatable */);
95        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
96                binaryDictionary.isValidDictionary());
97        binaryDictionary.close();
98    }
99
100    public void testAddTooLongWord() {
101        testAddTooLongWord(FormatSpec.VERSION4);
102    }
103
104    private void testAddTooLongWord(final int formatVersion) {
105        File dictFile = null;
106        try {
107            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
108        } catch (IOException e) {
109            fail("IOException while writing an initial dictionary : " + e);
110        }
111        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
112                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
113                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
114
115        final StringBuffer stringBuilder = new StringBuffer();
116        for (int i = 0; i < Constants.DICTIONARY_MAX_WORD_LENGTH; i++) {
117            stringBuilder.append('a');
118        }
119        final String validLongWord = stringBuilder.toString();
120        stringBuilder.append('a');
121        final String invalidLongWord = stringBuilder.toString();
122        final int probability = 100;
123        addUnigramWord(binaryDictionary, "aaa", probability);
124        addUnigramWord(binaryDictionary, validLongWord, probability);
125        addUnigramWord(binaryDictionary, invalidLongWord, probability);
126        // Too long short cut.
127        binaryDictionary.addUnigramWord("a", probability, invalidLongWord,
128                10 /* shortcutProbability */, false /* isNotAWord */, false /* isBlacklisted */,
129                BinaryDictionary.NOT_A_VALID_TIMESTAMP);
130        addUnigramWord(binaryDictionary, "abc", probability);
131        final int updatedProbability = 200;
132        // Update.
133        addUnigramWord(binaryDictionary, validLongWord, updatedProbability);
134        addUnigramWord(binaryDictionary, invalidLongWord, updatedProbability);
135        addUnigramWord(binaryDictionary, "abc", updatedProbability);
136
137        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
138        assertEquals(updatedProbability, binaryDictionary.getFrequency(validLongWord));
139        assertEquals(BinaryDictionary.NOT_A_PROBABILITY,
140                binaryDictionary.getFrequency(invalidLongWord));
141        assertEquals(updatedProbability, binaryDictionary.getFrequency("abc"));
142        dictFile.delete();
143    }
144
145    private void addUnigramWord(final BinaryDictionary binaryDictionary, final String word,
146            final int probability) {
147        binaryDictionary.addUnigramWord(word, probability, "" /* shortcutTarget */,
148                BinaryDictionary.NOT_A_PROBABILITY /* shortcutProbability */,
149                false /* isNotAWord */, false /* isBlacklisted */,
150                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
151    }
152
153    private void addBigramWords(final BinaryDictionary binaryDictionary, final String word0,
154            final String word1, final int probability) {
155        binaryDictionary.addBigramWords(word0, word1, probability,
156                BinaryDictionary.NOT_A_VALID_TIMESTAMP /* timestamp */);
157    }
158
159    public void testAddUnigramWord() {
160        testAddUnigramWord(FormatSpec.VERSION4);
161    }
162
163    private void testAddUnigramWord(final int formatVersion) {
164        File dictFile = null;
165        try {
166            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
167        } catch (IOException e) {
168            fail("IOException while writing an initial dictionary : " + e);
169        }
170        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
171                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
172                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
173
174        final int probability = 100;
175        addUnigramWord(binaryDictionary, "aaa", probability);
176        // Reallocate and create.
177        addUnigramWord(binaryDictionary, "aab", probability);
178        // Insert into children.
179        addUnigramWord(binaryDictionary, "aac", probability);
180        // Make terminal.
181        addUnigramWord(binaryDictionary, "aa", probability);
182        // Create children.
183        addUnigramWord(binaryDictionary, "aaaa", probability);
184        // Reallocate and make termianl.
185        addUnigramWord(binaryDictionary, "a", probability);
186
187        final int updatedProbability = 200;
188        // Update.
189        addUnigramWord(binaryDictionary, "aaa", updatedProbability);
190
191        assertEquals(probability, binaryDictionary.getFrequency("aab"));
192        assertEquals(probability, binaryDictionary.getFrequency("aac"));
193        assertEquals(probability, binaryDictionary.getFrequency("aa"));
194        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
195        assertEquals(probability, binaryDictionary.getFrequency("a"));
196        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
197
198        dictFile.delete();
199    }
200
201    public void testRandomlyAddUnigramWord() {
202        testRandomlyAddUnigramWord(FormatSpec.VERSION4);
203    }
204
205    private void testRandomlyAddUnigramWord(final int formatVersion) {
206        final int wordCount = 1000;
207        final int codePointSetSize = 50;
208        final long seed = System.currentTimeMillis();
209
210        File dictFile = null;
211        try {
212            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
213        } catch (IOException e) {
214            fail("IOException while writing an initial dictionary : " + e);
215        }
216        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
217                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
218                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
219
220        final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>();
221        // Test a word that isn't contained within the dictionary.
222        final Random random = new Random(seed);
223        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
224        for (int i = 0; i < wordCount; ++i) {
225            final String word = CodePointUtils.generateWord(random, codePointSet);
226            probabilityMap.put(word, random.nextInt(0xFF));
227        }
228        for (String word : probabilityMap.keySet()) {
229            addUnigramWord(binaryDictionary, word, probabilityMap.get(word));
230        }
231        for (String word : probabilityMap.keySet()) {
232            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
233        }
234        dictFile.delete();
235    }
236
237    public void testAddBigramWords() {
238        testAddBigramWords(FormatSpec.VERSION4);
239    }
240
241    private void testAddBigramWords(final int formatVersion) {
242        File dictFile = null;
243        try {
244            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
245        } catch (IOException e) {
246            fail("IOException while writing an initial dictionary : " + e);
247        }
248        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
249                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
250                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
251
252        final int unigramProbability = 100;
253        final int bigramProbability = 10;
254        final int updatedBigramProbability = 15;
255        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
256        addUnigramWord(binaryDictionary, "abb", unigramProbability);
257        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
258        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
259        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
260        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
261        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
262
263        final int probability = binaryDictionary.calculateProbability(unigramProbability,
264                bigramProbability);
265        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
266        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
267        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
268        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
269        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
270        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
271        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
272        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
273
274        addBigramWords(binaryDictionary, "aaa", "abb", updatedBigramProbability);
275        final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
276                updatedBigramProbability);
277        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
278
279        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
280        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
281        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
282        assertEquals(Dictionary.NOT_A_PROBABILITY,
283                binaryDictionary.getBigramProbability("bcc", "aaa"));
284        assertEquals(Dictionary.NOT_A_PROBABILITY,
285                binaryDictionary.getBigramProbability("bcc", "bbc"));
286        assertEquals(Dictionary.NOT_A_PROBABILITY,
287                binaryDictionary.getBigramProbability("aaa", "aaa"));
288
289        // Testing bigram link.
290        addUnigramWord(binaryDictionary, "abcde", unigramProbability);
291        addUnigramWord(binaryDictionary, "fghij", unigramProbability);
292        addBigramWords(binaryDictionary, "abcde", "fghij", bigramProbability);
293        addUnigramWord(binaryDictionary, "fgh", unigramProbability);
294        addUnigramWord(binaryDictionary, "abc", unigramProbability);
295        addUnigramWord(binaryDictionary, "f", unigramProbability);
296        assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
297        assertEquals(Dictionary.NOT_A_PROBABILITY,
298                binaryDictionary.getBigramProbability("abcde", "fgh"));
299        addBigramWords(binaryDictionary, "abcde", "fghij", updatedBigramProbability);
300        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
301
302        dictFile.delete();
303    }
304
305    public void testRandomlyAddBigramWords() {
306        testRandomlyAddBigramWords(FormatSpec.VERSION4);
307    }
308
309    private void testRandomlyAddBigramWords(final int formatVersion) {
310        final int wordCount = 100;
311        final int bigramCount = 1000;
312        final int codePointSetSize = 50;
313        final long seed = System.currentTimeMillis();
314        final Random random = new Random(seed);
315
316        File dictFile = null;
317        try {
318            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
319        } catch (IOException e) {
320            fail("IOException while writing an initial dictionary : " + e);
321        }
322        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
323                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
324                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
325
326        final ArrayList<String> words = new ArrayList<String>();
327        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
328        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
329        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
330        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
331                new HashMap<Pair<String, String>, Integer>();
332
333        for (int i = 0; i < wordCount; ++i) {
334            final String word = CodePointUtils.generateWord(random, codePointSet);
335            words.add(word);
336            final int unigramProbability = random.nextInt(0xFF);
337            unigramProbabilities.put(word, unigramProbability);
338            addUnigramWord(binaryDictionary, word, unigramProbability);
339        }
340
341        for (int i = 0; i < bigramCount; i++) {
342            final String word0 = words.get(random.nextInt(wordCount));
343            final String word1 = words.get(random.nextInt(wordCount));
344            if (TextUtils.equals(word0, word1)) {
345                continue;
346            }
347            final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
348            bigramWords.add(bigram);
349            final int bigramProbability = random.nextInt(0xF);
350            bigramProbabilities.put(bigram, bigramProbability);
351            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
352        }
353
354        for (final Pair<String, String> bigram : bigramWords) {
355            final int unigramProbability = unigramProbabilities.get(bigram.second);
356            final int bigramProbability = bigramProbabilities.get(bigram);
357            final int probability = binaryDictionary.calculateProbability(unigramProbability,
358                    bigramProbability);
359            assertEquals(probability,
360                    binaryDictionary.getBigramProbability(bigram.first, bigram.second));
361        }
362
363        dictFile.delete();
364    }
365
366    public void testRemoveBigramWords() {
367        testRemoveBigramWords(FormatSpec.VERSION4);
368    }
369
370    private void testRemoveBigramWords(final int formatVersion) {
371        File dictFile = null;
372        try {
373            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
374        } catch (IOException e) {
375            fail("IOException while writing an initial dictionary : " + e);
376        }
377        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
378                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
379                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
380        final int unigramProbability = 100;
381        final int bigramProbability = 10;
382        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
383        addUnigramWord(binaryDictionary, "abb", unigramProbability);
384        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
385        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
386        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
387        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
388        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
389
390        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
391        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
392        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
393        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
394
395        binaryDictionary.removeBigramWords("aaa", "abb");
396        assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
397        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
398        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
399
400
401        binaryDictionary.removeBigramWords("aaa", "bcc");
402        assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
403        binaryDictionary.removeBigramWords("abb", "aaa");
404        assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
405        binaryDictionary.removeBigramWords("abb", "bcc");
406        assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
407
408        binaryDictionary.removeBigramWords("aaa", "abb");
409        // Test remove non-existing bigram operation.
410        binaryDictionary.removeBigramWords("aaa", "abb");
411        binaryDictionary.removeBigramWords("bcc", "aaa");
412
413        dictFile.delete();
414    }
415
416    public void testFlushDictionary() {
417        testFlushDictionary(FormatSpec.VERSION4);
418    }
419
420    private void testFlushDictionary(final int formatVersion) {
421        File dictFile = null;
422        try {
423            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
424        } catch (IOException e) {
425            fail("IOException while writing an initial dictionary : " + e);
426        }
427        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
428                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
429                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
430
431        final int probability = 100;
432        addUnigramWord(binaryDictionary, "aaa", probability);
433        addUnigramWord(binaryDictionary, "abcd", probability);
434        // Close without flushing.
435        binaryDictionary.close();
436
437        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
438                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
439                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
440
441        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
442        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
443
444        addUnigramWord(binaryDictionary, "aaa", probability);
445        addUnigramWord(binaryDictionary, "abcd", probability);
446        binaryDictionary.flush();
447        binaryDictionary.close();
448
449        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
450                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
451                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
452
453        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
454        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
455        addUnigramWord(binaryDictionary, "bcde", probability);
456        binaryDictionary.flush();
457        binaryDictionary.close();
458
459        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
460                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
461                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
462        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
463        binaryDictionary.close();
464
465        dictFile.delete();
466    }
467
468    public void testFlushWithGCDictionary() {
469        testFlushWithGCDictionary(FormatSpec.VERSION4);
470    }
471
472    private void testFlushWithGCDictionary(final int formatVersion) {
473        File dictFile = null;
474        try {
475            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
476        } catch (IOException e) {
477            fail("IOException while writing an initial dictionary : " + e);
478        }
479        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
480                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
481                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
482
483        final int unigramProbability = 100;
484        final int bigramProbability = 10;
485        addUnigramWord(binaryDictionary, "aaa", unigramProbability);
486        addUnigramWord(binaryDictionary, "abb", unigramProbability);
487        addUnigramWord(binaryDictionary, "bcc", unigramProbability);
488        addBigramWords(binaryDictionary, "aaa", "abb", bigramProbability);
489        addBigramWords(binaryDictionary, "aaa", "bcc", bigramProbability);
490        addBigramWords(binaryDictionary, "abb", "aaa", bigramProbability);
491        addBigramWords(binaryDictionary, "abb", "bcc", bigramProbability);
492        binaryDictionary.flushWithGC();
493        binaryDictionary.close();
494
495        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
496                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
497                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
498        final int probability = binaryDictionary.calculateProbability(unigramProbability,
499                bigramProbability);
500        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
501        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
502        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
503        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
504        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
505        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
506        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
507        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
508        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
509        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
510        binaryDictionary.flushWithGC();
511        binaryDictionary.close();
512
513        dictFile.delete();
514    }
515
516    public void testAddBigramWordsAndFlashWithGC() {
517        testAddBigramWordsAndFlashWithGC(FormatSpec.VERSION4);
518    }
519
520    // TODO: Evaluate performance of GC
521    private void testAddBigramWordsAndFlashWithGC(final int formatVersion) {
522        final int wordCount = 100;
523        final int bigramCount = 1000;
524        final int codePointSetSize = 30;
525        final long seed = System.currentTimeMillis();
526        final Random random = new Random(seed);
527
528        File dictFile = null;
529        try {
530            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
531        } catch (IOException e) {
532            fail("IOException while writing an initial dictionary : " + e);
533        }
534
535        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
536                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
537                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
538
539        final ArrayList<String> words = new ArrayList<String>();
540        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
541        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
542        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
543        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
544                new HashMap<Pair<String, String>, Integer>();
545
546        for (int i = 0; i < wordCount; ++i) {
547            final String word = CodePointUtils.generateWord(random, codePointSet);
548            words.add(word);
549            final int unigramProbability = random.nextInt(0xFF);
550            unigramProbabilities.put(word, unigramProbability);
551            addUnigramWord(binaryDictionary, word, unigramProbability);
552        }
553
554        for (int i = 0; i < bigramCount; i++) {
555            final String word0 = words.get(random.nextInt(wordCount));
556            final String word1 = words.get(random.nextInt(wordCount));
557            if (TextUtils.equals(word0, word1)) {
558                continue;
559            }
560            final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
561            bigramWords.add(bigram);
562            final int bigramProbability = random.nextInt(0xF);
563            bigramProbabilities.put(bigram, bigramProbability);
564            addBigramWords(binaryDictionary, word0, word1, bigramProbability);
565        }
566
567        binaryDictionary.flushWithGC();
568        binaryDictionary.close();
569        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
570                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
571                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
572
573        for (final Pair<String, String> bigram : bigramWords) {
574            final int unigramProbability = unigramProbabilities.get(bigram.second);
575            final int bigramProbability = bigramProbabilities.get(bigram);
576            final int probability = binaryDictionary.calculateProbability(unigramProbability,
577                    bigramProbability);
578            assertEquals(probability,
579                    binaryDictionary.getBigramProbability(bigram.first, bigram.second));
580        }
581
582        dictFile.delete();
583    }
584
585    public void testRandomOperationsAndFlashWithGC() {
586        testRandomOperationsAndFlashWithGC(FormatSpec.VERSION4);
587    }
588
589    private void testRandomOperationsAndFlashWithGC(final int formatVersion) {
590        final int flashWithGCIterationCount = 50;
591        final int operationCountInEachIteration = 200;
592        final int initialUnigramCount = 100;
593        final float addUnigramProb = 0.5f;
594        final float addBigramProb = 0.8f;
595        final float removeBigramProb = 0.2f;
596        final int codePointSetSize = 30;
597
598        final long seed = System.currentTimeMillis();
599        final Random random = new Random(seed);
600
601        File dictFile = null;
602        try {
603            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
604        } catch (IOException e) {
605            fail("IOException while writing an initial dictionary : " + e);
606        }
607
608        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
609                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
610                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
611        final ArrayList<String> words = new ArrayList<String>();
612        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
613        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
614        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
615        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
616                new HashMap<Pair<String, String>, Integer>();
617        for (int i = 0; i < initialUnigramCount; ++i) {
618            final String word = CodePointUtils.generateWord(random, codePointSet);
619            words.add(word);
620            final int unigramProbability = random.nextInt(0xFF);
621            unigramProbabilities.put(word, unigramProbability);
622            addUnigramWord(binaryDictionary, word, unigramProbability);
623        }
624        binaryDictionary.flushWithGC();
625        binaryDictionary.close();
626
627        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
628            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
629                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
630                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
631            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
632                // Add unigram.
633                if (random.nextFloat() < addUnigramProb) {
634                    final String word = CodePointUtils.generateWord(random, codePointSet);
635                    words.add(word);
636                    final int unigramProbability = random.nextInt(0xFF);
637                    unigramProbabilities.put(word, unigramProbability);
638                    addUnigramWord(binaryDictionary, word, unigramProbability);
639                }
640                // Add bigram.
641                if (random.nextFloat() < addBigramProb && words.size() > 2) {
642                    final int word0Index = random.nextInt(words.size());
643                    int word1Index = random.nextInt(words.size() - 1);
644                    if (word0Index <= word1Index) {
645                        word1Index++;
646                    }
647                    final String word0 = words.get(word0Index);
648                    final String word1 = words.get(word1Index);
649                    if (TextUtils.equals(word0, word1)) {
650                        continue;
651                    }
652                    final int bigramProbability = random.nextInt(0xF);
653                    final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
654                    bigramWords.add(bigram);
655                    bigramProbabilities.put(bigram, bigramProbability);
656                    addBigramWords(binaryDictionary, word0, word1, bigramProbability);
657                }
658                // Remove bigram.
659                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
660                    final int bigramIndex = random.nextInt(bigramWords.size());
661                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
662                    bigramWords.remove(bigramIndex);
663                    bigramProbabilities.remove(bigram);
664                    binaryDictionary.removeBigramWords(bigram.first, bigram.second);
665                }
666            }
667
668            // Test whether the all unigram operations are collectlly handled.
669            for (int i = 0; i < words.size(); i++) {
670                final String word = words.get(i);
671                final int unigramProbability = unigramProbabilities.get(word);
672                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
673            }
674            // Test whether the all bigram operations are collectlly handled.
675            for (int i = 0; i < bigramWords.size(); i++) {
676                final Pair<String, String> bigram = bigramWords.get(i);
677                final int unigramProbability = unigramProbabilities.get(bigram.second);
678                final int probability;
679                if (bigramProbabilities.containsKey(bigram)) {
680                    final int bigramProbability = bigramProbabilities.get(bigram);
681                    probability = binaryDictionary.calculateProbability(unigramProbability,
682                            bigramProbability);
683                } else {
684                    probability = Dictionary.NOT_A_PROBABILITY;
685                }
686                assertEquals(probability,
687                        binaryDictionary.getBigramProbability(bigram.first, bigram.second));
688            }
689            binaryDictionary.flushWithGC();
690            binaryDictionary.close();
691        }
692
693        dictFile.delete();
694    }
695
696    public void testAddManyUnigramsAndFlushWithGC() {
697        testAddManyUnigramsAndFlushWithGC(FormatSpec.VERSION4);
698    }
699
700    private void testAddManyUnigramsAndFlushWithGC(final int formatVersion) {
701        final int flashWithGCIterationCount = 3;
702        final int codePointSetSize = 50;
703
704        final long seed = System.currentTimeMillis();
705        final Random random = new Random(seed);
706
707        File dictFile = null;
708        try {
709            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
710        } catch (IOException e) {
711            fail("IOException while writing an initial dictionary : " + e);
712        }
713
714        final ArrayList<String> words = new ArrayList<String>();
715        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
716        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
717
718        BinaryDictionary binaryDictionary;
719        for (int i = 0; i < flashWithGCIterationCount; i++) {
720            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
721                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
722                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
723            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
724                final String word = CodePointUtils.generateWord(random, codePointSet);
725                words.add(word);
726                final int unigramProbability = random.nextInt(0xFF);
727                unigramProbabilities.put(word, unigramProbability);
728                addUnigramWord(binaryDictionary, word, unigramProbability);
729            }
730
731            for (int j = 0; j < words.size(); j++) {
732                final String word = words.get(j);
733                final int unigramProbability = unigramProbabilities.get(word);
734                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
735            }
736
737            binaryDictionary.flushWithGC();
738            binaryDictionary.close();
739        }
740
741        dictFile.delete();
742    }
743
744    public void testUnigramAndBigramCount() {
745        testUnigramAndBigramCount(FormatSpec.VERSION4);
746    }
747
748    private void testUnigramAndBigramCount(final int formatVersion) {
749        final int flashWithGCIterationCount = 10;
750        final int codePointSetSize = 50;
751        final int unigramCountPerIteration = 1000;
752        final int bigramCountPerIteration = 2000;
753        final long seed = System.currentTimeMillis();
754        final Random random = new Random(seed);
755
756        File dictFile = null;
757        try {
758            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
759        } catch (IOException e) {
760            fail("IOException while writing an initial dictionary : " + e);
761        }
762
763        final ArrayList<String> words = new ArrayList<String>();
764        final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
765        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
766
767        BinaryDictionary binaryDictionary;
768        for (int i = 0; i < flashWithGCIterationCount; i++) {
769            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
770                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
771                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
772            for (int j = 0; j < unigramCountPerIteration; j++) {
773                final String word = CodePointUtils.generateWord(random, codePointSet);
774                words.add(word);
775                final int unigramProbability = random.nextInt(0xFF);
776                addUnigramWord(binaryDictionary, word, unigramProbability);
777            }
778            for (int j = 0; j < bigramCountPerIteration; j++) {
779                final String word0 = words.get(random.nextInt(words.size()));
780                final String word1 = words.get(random.nextInt(words.size()));
781                if (TextUtils.equals(word0, word1)) {
782                    continue;
783                }
784                bigrams.add(new Pair<String, String>(word0, word1));
785                final int bigramProbability = random.nextInt(0xF);
786                addBigramWords(binaryDictionary, word0, word1, bigramProbability);
787            }
788            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
789                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
790            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
791                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
792            binaryDictionary.flushWithGC();
793            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
794                    binaryDictionary.getPropertyForTest(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
795            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
796                    binaryDictionary.getPropertyForTest(BinaryDictionary.BIGRAM_COUNT_QUERY)));
797            binaryDictionary.close();
798        }
799
800        dictFile.delete();
801    }
802
803    public void testAddMultipleDictionaryEntries() {
804        testAddMultipleDictionaryEntries(FormatSpec.VERSION4);
805    }
806
807    private void testAddMultipleDictionaryEntries(final int formatVersion) {
808        final int codePointSetSize = 20;
809        final int lmParamCount = 1000;
810        final double bigramContinueRate = 0.9;
811        final long seed = System.currentTimeMillis();
812        final Random random = new Random(seed);
813
814        File dictFile = null;
815        try {
816            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
817        } catch (IOException e) {
818            fail("IOException while writing an initial dictionary : " + e);
819        }
820
821        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
822        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
823        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
824                new HashMap<Pair<String, String>, Integer>();
825
826        final LanguageModelParam[] languageModelParams = new LanguageModelParam[lmParamCount];
827        String prevWord = null;
828        for (int i = 0; i < languageModelParams.length; i++) {
829            final String word = CodePointUtils.generateWord(random, codePointSet);
830            final int probability = random.nextInt(0xFF);
831            final int bigramProbability = random.nextInt(0xF);
832            unigramProbabilities.put(word, probability);
833            if (prevWord == null) {
834                languageModelParams[i] = new LanguageModelParam(word, probability,
835                        BinaryDictionary.NOT_A_VALID_TIMESTAMP);
836            } else {
837                languageModelParams[i] = new LanguageModelParam(prevWord, word, probability,
838                        bigramProbability, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
839                bigramProbabilities.put(new Pair<String, String>(prevWord, word),
840                        bigramProbability);
841            }
842            prevWord = (random.nextDouble() < bigramContinueRate) ? word : null;
843        }
844
845        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
846                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
847                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
848        binaryDictionary.addMultipleDictionaryEntries(languageModelParams);
849
850        for (Map.Entry<String, Integer> entry : unigramProbabilities.entrySet()) {
851            assertEquals((int)entry.getValue(), binaryDictionary.getFrequency(entry.getKey()));
852        }
853
854        for (Map.Entry<Pair<String, String>, Integer> entry : bigramProbabilities.entrySet()) {
855            final String word0 = entry.getKey().first;
856            final String word1 = entry.getKey().second;
857            final int unigramProbability = unigramProbabilities.get(word1);
858            final int bigramProbability = entry.getValue();
859            final int probability = binaryDictionary.calculateProbability(
860                    unigramProbability, bigramProbability);
861            assertEquals(probability, binaryDictionary.getBigramProbability(word0, word1));
862        }
863    }
864
865    public void testGetWordProperties() {
866        testGetWordProperties(FormatSpec.VERSION4);
867    }
868
869    private void testGetWordProperties(final int formatVersion) {
870        final long seed = System.currentTimeMillis();
871        final Random random = new Random(seed);
872        final int UNIGRAM_COUNT = 1000;
873        final int BIGRAM_COUNT = 1000;
874        final int codePointSetSize = 20;
875        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
876
877        File dictFile = null;
878        try {
879            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
880        } catch (IOException e) {
881            fail("IOException while writing an initial dictionary : " + e);
882        }
883        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
884                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
885                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
886
887        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
888        assertFalse(invalidWordProperty.isValid());
889
890        final ArrayList<String> words = new ArrayList<String>();
891        final HashMap<String, Integer> wordProbabilities = new HashMap<String, Integer>();
892        final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
893        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
894                new HashMap<Pair<String, String>, Integer>();
895
896        for (int i = 0; i < UNIGRAM_COUNT; i++) {
897            final String word = CodePointUtils.generateWord(random, codePointSet);
898            final int unigramProbability = random.nextInt(0xFF);
899            final boolean isNotAWord = random.nextBoolean();
900            final boolean isBlacklisted = random.nextBoolean();
901            // TODO: Add tests for historical info.
902            binaryDictionary.addUnigramWord(word, unigramProbability,
903                    null /* shortcutTarget */, BinaryDictionary.NOT_A_PROBABILITY,
904                    isNotAWord, isBlacklisted, BinaryDictionary.NOT_A_VALID_TIMESTAMP);
905            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
906                binaryDictionary.flushWithGC();
907            }
908            words.add(word);
909            wordProbabilities.put(word, unigramProbability);
910            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
911            assertEquals(word, wordProperty.mWord);
912            assertTrue(wordProperty.isValid());
913            assertEquals(isNotAWord, wordProperty.mIsNotAWord);
914            assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
915            assertEquals(false, wordProperty.mHasBigrams);
916            assertEquals(false, wordProperty.mHasShortcuts);
917            assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
918            assertTrue(wordProperty.mShortcutTargets.isEmpty());
919        }
920
921        for (int i = 0; i < BIGRAM_COUNT; i++) {
922            final int word0Index = random.nextInt(wordProbabilities.size());
923            final int word1Index = random.nextInt(wordProbabilities.size());
924            if (word0Index == word1Index) {
925                continue;
926            }
927            final String word0 = words.get(word0Index);
928            final String word1 = words.get(word1Index);
929            final int bigramProbability = random.nextInt(0xF);
930            binaryDictionary.addBigramWords(word0, word1, bigramProbability,
931                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
932            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
933                binaryDictionary.flushWithGC();
934            }
935            if (!bigrams.containsKey(word0)) {
936                final HashSet<String> bigramWord1s = new HashSet<String>();
937                bigrams.put(word0, bigramWord1s);
938            }
939            bigrams.get(word0).add(word1);
940            bigramProbabilities.put(new Pair<String, String>(word0, word1), bigramProbability);
941        }
942
943        for (int i = 0; i < words.size(); i++) {
944            final String word0 = words.get(i);
945            if (!bigrams.containsKey(word0)) {
946                continue;
947            }
948            final HashSet<String> bigramWord1s = bigrams.get(word0);
949            final WordProperty wordProperty = binaryDictionary.getWordProperty(word0);
950            assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
951            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
952                final String word1 = wordProperty.mBigrams.get(j).mWord;
953                assertTrue(bigramWord1s.contains(word1));
954                final int bigramProbabilityDelta = bigramProbabilities.get(
955                        new Pair<String, String>(word0, word1));
956                final int unigramProbability = wordProbabilities.get(word1);
957                final int bigramProbablity = binaryDictionary.calculateProbability(
958                        unigramProbability, bigramProbabilityDelta);
959                assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
960            }
961        }
962    }
963
964    public void testIterateAllWords() {
965        testIterateAllWords(FormatSpec.VERSION4);
966    }
967
968    private void testIterateAllWords(final int formatVersion) {
969        final long seed = System.currentTimeMillis();
970        final Random random = new Random(seed);
971        final int UNIGRAM_COUNT = 1000;
972        final int BIGRAM_COUNT = 1000;
973        final int codePointSetSize = 20;
974        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
975
976        File dictFile = null;
977        try {
978            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
979        } catch (IOException e) {
980            fail("IOException while writing an initial dictionary : " + e);
981        }
982        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
983                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
984                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
985
986        final WordProperty invalidWordProperty = binaryDictionary.getWordProperty("dummyWord");
987        assertFalse(invalidWordProperty.isValid());
988
989        final ArrayList<String> words = new ArrayList<String>();
990        final HashMap<String, Integer> wordProbabilitiesToCheckLater =
991                new HashMap<String, Integer>();
992        final HashMap<String, HashSet<String>> bigrams = new HashMap<String, HashSet<String>>();
993        final HashMap<Pair<String, String>, Integer> bigramProbabilitiesToCheckLater =
994                new HashMap<Pair<String, String>, Integer>();
995
996        for (int i = 0; i < UNIGRAM_COUNT; i++) {
997            final String word = CodePointUtils.generateWord(random, codePointSet);
998            final int unigramProbability = random.nextInt(0xFF);
999            addUnigramWord(binaryDictionary, word, unigramProbability);
1000            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1001                binaryDictionary.flushWithGC();
1002            }
1003            words.add(word);
1004            wordProbabilitiesToCheckLater.put(word, unigramProbability);
1005        }
1006
1007        for (int i = 0; i < BIGRAM_COUNT; i++) {
1008            final int word0Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1009            final int word1Index = random.nextInt(wordProbabilitiesToCheckLater.size());
1010            if (word0Index == word1Index) {
1011                continue;
1012            }
1013            final String word0 = words.get(word0Index);
1014            final String word1 = words.get(word1Index);
1015            final int bigramProbability = random.nextInt(0xF);
1016            binaryDictionary.addBigramWords(word0, word1, bigramProbability,
1017                    BinaryDictionary.NOT_A_VALID_TIMESTAMP);
1018            if (binaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
1019                binaryDictionary.flushWithGC();
1020            }
1021            if (!bigrams.containsKey(word0)) {
1022                final HashSet<String> bigramWord1s = new HashSet<String>();
1023                bigrams.put(word0, bigramWord1s);
1024            }
1025            bigrams.get(word0).add(word1);
1026            bigramProbabilitiesToCheckLater.put(
1027                    new Pair<String, String>(word0, word1), bigramProbability);
1028        }
1029
1030        final HashSet<String> wordSet = new HashSet<String>(words);
1031        final HashSet<Pair<String, String>> bigramSet =
1032                new HashSet<Pair<String,String>>(bigramProbabilitiesToCheckLater.keySet());
1033        int token = 0;
1034        do {
1035            final BinaryDictionary.GetNextWordPropertyResult result =
1036                    binaryDictionary.getNextWordProperty(token);
1037            final WordProperty wordProperty = result.mWordProperty;
1038            final String word0 = wordProperty.mWord;
1039            assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
1040                    wordProperty.mProbabilityInfo.mProbability);
1041            wordSet.remove(word0);
1042            final HashSet<String> bigramWord1s = bigrams.get(word0);
1043            for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
1044                final String word1 = wordProperty.mBigrams.get(j).mWord;
1045                assertTrue(bigramWord1s.contains(word1));
1046                final int unigramProbability = wordProbabilitiesToCheckLater.get(word1);
1047                final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
1048                final int bigramProbabilityDelta = bigramProbabilitiesToCheckLater.get(bigram);
1049                final int bigramProbablity = binaryDictionary.calculateProbability(
1050                        unigramProbability, bigramProbabilityDelta);
1051                assertEquals(wordProperty.mBigrams.get(j).getProbability(), bigramProbablity);
1052                bigramSet.remove(bigram);
1053            }
1054            token = result.mNextToken;
1055        } while (token != 0);
1056        assertTrue(wordSet.isEmpty());
1057        assertTrue(bigramSet.isEmpty());
1058    }
1059
1060    public void testAddShortcuts() {
1061        testAddShortcuts(FormatSpec.VERSION4);
1062    }
1063
1064    private void testAddShortcuts(final int formatVersion) {
1065        File dictFile = null;
1066        try {
1067            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1068        } catch (IOException e) {
1069            fail("IOException while writing an initial dictionary : " + e);
1070        }
1071        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1072                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1073                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1074
1075        final int unigramProbability = 100;
1076        final int shortcutProbability = 10;
1077        binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
1078                shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
1079                0 /* timestamp */);
1080        WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
1081        assertEquals(1, wordProperty.mShortcutTargets.size());
1082        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1083        assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
1084        final int updatedShortcutProbability = 2;
1085        binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
1086                updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
1087                0 /* timestamp */);
1088        wordProperty = binaryDictionary.getWordProperty("aaa");
1089        assertEquals(1, wordProperty.mShortcutTargets.size());
1090        assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
1091        assertEquals(updatedShortcutProbability,
1092                wordProperty.mShortcutTargets.get(0).getProbability());
1093        binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
1094                shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
1095                0 /* timestamp */);
1096        final HashMap<String, Integer> shortcutTargets = new HashMap<String, Integer>();
1097        shortcutTargets.put("zzz", updatedShortcutProbability);
1098        shortcutTargets.put("yyy", shortcutProbability);
1099        wordProperty = binaryDictionary.getWordProperty("aaa");
1100        assertEquals(2, wordProperty.mShortcutTargets.size());
1101        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1102            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1103            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1104                    shortcutTarget.getProbability());
1105            shortcutTargets.remove(shortcutTarget.mWord);
1106        }
1107        shortcutTargets.put("zzz", updatedShortcutProbability);
1108        shortcutTargets.put("yyy", shortcutProbability);
1109        binaryDictionary.flushWithGC();
1110        wordProperty = binaryDictionary.getWordProperty("aaa");
1111        assertEquals(2, wordProperty.mShortcutTargets.size());
1112        for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1113            assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
1114            assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
1115                    shortcutTarget.getProbability());
1116            shortcutTargets.remove(shortcutTarget.mWord);
1117        }
1118    }
1119
1120    public void testAddManyShortcuts() {
1121        testAddManyShortcuts(FormatSpec.VERSION4);
1122    }
1123
1124    private void testAddManyShortcuts(final int formatVersion) {
1125        final long seed = System.currentTimeMillis();
1126        final Random random = new Random(seed);
1127        final int UNIGRAM_COUNT = 1000;
1128        final int SHORTCUT_COUNT = 10000;
1129        final int codePointSetSize = 20;
1130        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
1131
1132        final ArrayList<String> words = new ArrayList<String>();
1133        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
1134        final HashMap<String, HashMap<String, Integer>> shortcutTargets =
1135                new HashMap<String, HashMap<String, Integer>>();
1136
1137        File dictFile = null;
1138        try {
1139            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
1140        } catch (IOException e) {
1141            fail("IOException while writing an initial dictionary : " + e);
1142        }
1143        final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
1144                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
1145                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
1146
1147        for (int i = 0; i < UNIGRAM_COUNT; i++) {
1148            final String word = CodePointUtils.generateWord(random, codePointSet);
1149            final int unigramProbability = random.nextInt(0xFF);
1150            addUnigramWord(binaryDictionary, word, unigramProbability);
1151            words.add(word);
1152            unigramProbabilities.put(word, unigramProbability);
1153            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1154                binaryDictionary.flushWithGC();
1155            }
1156        }
1157        for (int i = 0; i < SHORTCUT_COUNT; i++) {
1158            final String shortcutTarget = CodePointUtils.generateWord(random, codePointSet);
1159            final int shortcutProbability = random.nextInt(0xF);
1160            final String word = words.get(random.nextInt(words.size()));
1161            final int unigramProbability = unigramProbabilities.get(word);
1162            binaryDictionary.addUnigramWord(word, unigramProbability, shortcutTarget,
1163                    shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
1164                    0 /* timestamp */);
1165            if (shortcutTargets.containsKey(word)) {
1166                final HashMap<String, Integer> shortcutTargetsOfWord = shortcutTargets.get(word);
1167                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1168            } else {
1169                final HashMap<String, Integer> shortcutTargetsOfWord =
1170                        new HashMap<String, Integer>();
1171                shortcutTargetsOfWord.put(shortcutTarget, shortcutProbability);
1172                shortcutTargets.put(word, shortcutTargetsOfWord);
1173            }
1174            if (binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
1175                binaryDictionary.flushWithGC();
1176            }
1177        }
1178
1179        for (final String word : words) {
1180            final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
1181            assertEquals((int)unigramProbabilities.get(word),
1182                    wordProperty.mProbabilityInfo.mProbability);
1183            if (!shortcutTargets.containsKey(word)) {
1184                // The word does not have shortcut targets.
1185                continue;
1186            }
1187            assertEquals(shortcutTargets.get(word).size(), wordProperty.mShortcutTargets.size());
1188            for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
1189                final String targetCodePonts = shortcutTarget.mWord;
1190                assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
1191                        shortcutTarget.getProbability());
1192            }
1193        }
1194    }
1195}
1196