BinaryDictionaryTests.java revision b698e9c1fab9df8e1cd58f997ad62147522538fc
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.util.Pair;
22
23import com.android.inputmethod.latin.makedict.CodePointUtils;
24import com.android.inputmethod.latin.makedict.FormatSpec;
25
26import java.io.File;
27import java.io.IOException;
28import java.util.ArrayList;
29import java.util.HashMap;
30import java.util.HashSet;
31import java.util.Locale;
32import java.util.Map;
33import java.util.Random;
34
35@LargeTest
36public class BinaryDictionaryTests extends AndroidTestCase {
37    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
38    private static final String TEST_LOCALE = "test";
39
40    @Override
41    protected void setUp() throws Exception {
42        super.setUp();
43    }
44
45    @Override
46    protected void tearDown() throws Exception {
47        super.tearDown();
48    }
49
50    private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
51        final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
52                getContext().getCacheDir());
53        Map<String, String> attributeMap = new HashMap<String, String>();
54        attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
55                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
56        if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
57                3 /* dictVersion */, attributeMap)) {
58            return file;
59        } else {
60            throw new IOException("Empty dictionary cannot be created.");
61        }
62    }
63
64    public void testIsValidDictionary() {
65        File dictFile = null;
66        try {
67            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
68        } catch (IOException e) {
69            fail("IOException while writing an initial dictionary : " + e);
70        }
71        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
72                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
73                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
74        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
75                binaryDictionary.isValidDictionary());
76        binaryDictionary.close();
77        assertFalse("binaryDictionary must be invalid after closing.",
78                binaryDictionary.isValidDictionary());
79        dictFile.delete();
80        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
81                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
82                TEST_LOCALE, true /* isUpdatable */);
83        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
84                binaryDictionary.isValidDictionary());
85        binaryDictionary.close();
86    }
87
88    public void testAddUnigramWord() {
89        File dictFile = null;
90        try {
91            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
92        } catch (IOException e) {
93            fail("IOException while writing an initial dictionary : " + e);
94        }
95        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
96                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
97                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
98
99        final int probability = 100;
100        binaryDictionary.addUnigramWord("aaa", probability);
101        // Reallocate and create.
102        binaryDictionary.addUnigramWord("aab", probability);
103        // Insert into children.
104        binaryDictionary.addUnigramWord("aac", probability);
105        // Make terminal.
106        binaryDictionary.addUnigramWord("aa", probability);
107        // Create children.
108        binaryDictionary.addUnigramWord("aaaa", probability);
109        // Reallocate and make termianl.
110        binaryDictionary.addUnigramWord("a", probability);
111
112        final int updatedProbability = 200;
113        // Update.
114        binaryDictionary.addUnigramWord("aaa", updatedProbability);
115
116        assertEquals(probability, binaryDictionary.getFrequency("aab"));
117        assertEquals(probability, binaryDictionary.getFrequency("aac"));
118        assertEquals(probability, binaryDictionary.getFrequency("aa"));
119        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
120        assertEquals(probability, binaryDictionary.getFrequency("a"));
121        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
122
123        dictFile.delete();
124    }
125
126    public void testRandomlyAddUnigramWord() {
127        final int wordCount = 1000;
128        final int codePointSetSize = 50;
129        final int seed = 123456789;
130
131        File dictFile = null;
132        try {
133            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
134        } catch (IOException e) {
135            fail("IOException while writing an initial dictionary : " + e);
136        }
137        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
138                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
139                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
140
141        final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>();
142        // Test a word that isn't contained within the dictionary.
143        final Random random = new Random(seed);
144        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
145        for (int i = 0; i < wordCount; ++i) {
146            final String word = CodePointUtils.generateWord(random, codePointSet);
147            probabilityMap.put(word, random.nextInt(0xFF));
148        }
149        for (String word : probabilityMap.keySet()) {
150            binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
151        }
152        for (String word : probabilityMap.keySet()) {
153            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
154        }
155        dictFile.delete();
156    }
157
158    public void testAddBigramWords() {
159        File dictFile = null;
160        try {
161            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
162        } catch (IOException e) {
163            fail("IOException while writing an initial dictionary : " + e);
164        }
165        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
166                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
167                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
168
169        final int unigramProbability = 100;
170        final int bigramProbability = 10;
171        final int updatedBigramProbability = 15;
172        binaryDictionary.addUnigramWord("aaa", unigramProbability);
173        binaryDictionary.addUnigramWord("abb", unigramProbability);
174        binaryDictionary.addUnigramWord("bcc", unigramProbability);
175        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
176        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
177        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
178        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
179
180        final int probability = binaryDictionary.calculateProbability(unigramProbability,
181                bigramProbability);
182        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
183        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
184        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
185        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
186        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
187        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
188        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
189        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
190
191        binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability);
192        final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
193                updatedBigramProbability);
194        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
195
196        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
197        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
198        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
199        assertEquals(Dictionary.NOT_A_PROBABILITY,
200                binaryDictionary.getBigramProbability("bcc", "aaa"));
201        assertEquals(Dictionary.NOT_A_PROBABILITY,
202                binaryDictionary.getBigramProbability("bcc", "bbc"));
203        assertEquals(Dictionary.NOT_A_PROBABILITY,
204                binaryDictionary.getBigramProbability("aaa", "aaa"));
205
206        // Testing bigram link.
207        binaryDictionary.addUnigramWord("abcde", unigramProbability);
208        binaryDictionary.addUnigramWord("fghij", unigramProbability);
209        binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability);
210        binaryDictionary.addUnigramWord("fgh", unigramProbability);
211        binaryDictionary.addUnigramWord("abc", unigramProbability);
212        binaryDictionary.addUnigramWord("f", unigramProbability);
213        assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
214        assertEquals(Dictionary.NOT_A_PROBABILITY,
215                binaryDictionary.getBigramProbability("abcde", "fgh"));
216        binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability);
217        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
218
219        dictFile.delete();
220    }
221
222    public void testRandomlyAddBigramWords() {
223        final int wordCount = 100;
224        final int bigramCount = 1000;
225        final int codePointSetSize = 50;
226        final int seed = 11111;
227
228        File dictFile = null;
229        try {
230            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
231        } catch (IOException e) {
232            fail("IOException while writing an initial dictionary : " + e);
233        }
234        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
235                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
236                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
237        final ArrayList<String> words = new ArrayList<String>();
238        // Test a word that isn't contained within the dictionary.
239        final Random random = new Random(seed);
240        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
241        final int[] unigramProbabilities = new int[wordCount];
242        for (int i = 0; i < wordCount; ++i) {
243            final String word = CodePointUtils.generateWord(random, codePointSet);
244            words.add(word);
245            final int unigramProbability = random.nextInt(0xFF);
246            unigramProbabilities[i] = unigramProbability;
247            binaryDictionary.addUnigramWord(word, unigramProbability);
248        }
249
250        final int[][] probabilities = new int[wordCount][wordCount];
251
252        for (int i = 0; i < wordCount; ++i) {
253            for (int j = 0; j < wordCount; ++j) {
254                probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
255            }
256        }
257
258        for (int i = 0; i < bigramCount; i++) {
259            final int word0Index = random.nextInt(wordCount);
260            final int word1Index = random.nextInt(wordCount);
261            final String word0 = words.get(word0Index);
262            final String word1 = words.get(word1Index);
263            final int bigramProbability = random.nextInt(0xF);
264            probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
265                    unigramProbabilities[word1Index], bigramProbability);
266            binaryDictionary.addBigramWords(word0, word1, bigramProbability);
267        }
268
269        for (int i = 0; i < words.size(); i++) {
270            for (int j = 0; j < words.size(); j++) {
271                assertEquals(probabilities[i][j],
272                        binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
273            }
274        }
275
276        dictFile.delete();
277    }
278
279    public void testRemoveBigramWords() {
280        File dictFile = null;
281        try {
282            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
283        } catch (IOException e) {
284            fail("IOException while writing an initial dictionary : " + e);
285        }
286        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
287                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
288                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
289        final int unigramProbability = 100;
290        final int bigramProbability = 10;
291        binaryDictionary.addUnigramWord("aaa", unigramProbability);
292        binaryDictionary.addUnigramWord("abb", unigramProbability);
293        binaryDictionary.addUnigramWord("bcc", unigramProbability);
294        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
295        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
296        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
297        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
298
299        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
300        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
301        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
302        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
303
304        binaryDictionary.removeBigramWords("aaa", "abb");
305        assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
306        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
307        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
308
309
310        binaryDictionary.removeBigramWords("aaa", "bcc");
311        assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
312        binaryDictionary.removeBigramWords("abb", "aaa");
313        assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
314        binaryDictionary.removeBigramWords("abb", "bcc");
315        assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
316
317        binaryDictionary.removeBigramWords("aaa", "abb");
318        // Test remove non-existing bigram operation.
319        binaryDictionary.removeBigramWords("aaa", "abb");
320        binaryDictionary.removeBigramWords("bcc", "aaa");
321
322        dictFile.delete();
323    }
324
325    public void testFlushDictionary() {
326        File dictFile = null;
327        try {
328            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
329        } catch (IOException e) {
330            fail("IOException while writing an initial dictionary : " + e);
331        }
332        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
333                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
334                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
335
336        final int probability = 100;
337        binaryDictionary.addUnigramWord("aaa", probability);
338        binaryDictionary.addUnigramWord("abcd", probability);
339        // Close without flushing.
340        binaryDictionary.close();
341
342        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
343                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
344                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
345
346        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
347        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
348
349        binaryDictionary.addUnigramWord("aaa", probability);
350        binaryDictionary.addUnigramWord("abcd", probability);
351        binaryDictionary.flush();
352        binaryDictionary.close();
353
354        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
355                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
356                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
357
358        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
359        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
360        binaryDictionary.addUnigramWord("bcde", probability);
361        binaryDictionary.flush();
362        binaryDictionary.close();
363
364        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
365                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
366                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
367        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
368        binaryDictionary.close();
369
370        dictFile.delete();
371    }
372
373    public void testFlushWithGCDictionary() {
374        File dictFile = null;
375        try {
376            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
377        } catch (IOException e) {
378            fail("IOException while writing an initial dictionary : " + e);
379        }
380        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
381                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
382                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
383
384        final int unigramProbability = 100;
385        final int bigramProbability = 10;
386        binaryDictionary.addUnigramWord("aaa", unigramProbability);
387        binaryDictionary.addUnigramWord("abb", unigramProbability);
388        binaryDictionary.addUnigramWord("bcc", unigramProbability);
389        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
390        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
391        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
392        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
393        binaryDictionary.flushWithGC();
394        binaryDictionary.close();
395
396        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
397                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
398                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
399        final int probability = binaryDictionary.calculateProbability(unigramProbability,
400                bigramProbability);
401        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
402        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
403        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
404        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
405        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
406        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
407        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
408        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
409        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
410        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
411        binaryDictionary.flushWithGC();
412        binaryDictionary.close();
413
414        dictFile.delete();
415    }
416
417    // TODO: Evaluate performance of GC
418    public void testAddBigramWordsAndFlashWithGC() {
419        final int wordCount = 100;
420        final int bigramCount = 1000;
421        final int codePointSetSize = 30;
422        // TODO: Use various seeds such as a current timestamp to make this test more random.
423        final int seed = 314159265;
424
425        File dictFile = null;
426        try {
427            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
428        } catch (IOException e) {
429            fail("IOException while writing an initial dictionary : " + e);
430        }
431
432        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
433                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
434                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
435        final ArrayList<String> words = new ArrayList<String>();
436        // Test a word that isn't contained within the dictionary.
437        final Random random = new Random(seed);
438        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
439        final int[] unigramProbabilities = new int[wordCount];
440        for (int i = 0; i < wordCount; ++i) {
441            final String word = CodePointUtils.generateWord(random, codePointSet);
442            words.add(word);
443            final int unigramProbability = random.nextInt(0xFF);
444            unigramProbabilities[i] = unigramProbability;
445            binaryDictionary.addUnigramWord(word, unigramProbability);
446        }
447
448        final int[][] probabilities = new int[wordCount][wordCount];
449
450        for (int i = 0; i < wordCount; ++i) {
451            for (int j = 0; j < wordCount; ++j) {
452                probabilities[i][j] = Dictionary.NOT_A_PROBABILITY;
453            }
454        }
455
456        for (int i = 0; i < bigramCount; i++) {
457            final int word0Index = random.nextInt(wordCount);
458            final int word1Index = random.nextInt(wordCount);
459            final String word0 = words.get(word0Index);
460            final String word1 = words.get(word1Index);
461            final int bigramProbability = random.nextInt(0xF);
462            probabilities[word0Index][word1Index] = binaryDictionary.calculateProbability(
463                    unigramProbabilities[word1Index], bigramProbability);
464            binaryDictionary.addBigramWords(word0, word1, bigramProbability);
465        }
466
467        binaryDictionary.flushWithGC();
468        binaryDictionary.close();
469        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
470                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
471                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
472
473        for (int i = 0; i < words.size(); i++) {
474            for (int j = 0; j < words.size(); j++) {
475                assertEquals(probabilities[i][j],
476                        binaryDictionary.getBigramProbability(words.get(i), words.get(j)));
477            }
478        }
479        dictFile.delete();
480    }
481
482    public void testRandomOperetionsAndFlashWithGC() {
483        final int flashWithGCIterationCount = 50;
484        final int operationCountInEachIteration = 200;
485        final int initialUnigramCount = 100;
486        final float addUnigramProb = 0.5f;
487        final float addBigramProb = 0.8f;
488        final float removeBigramProb = 0.2f;
489        final int codePointSetSize = 30;
490        final int seed = 141421356;
491
492        final Random random = new Random(seed);
493
494        File dictFile = null;
495        try {
496            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
497        } catch (IOException e) {
498            fail("IOException while writing an initial dictionary : " + e);
499        }
500
501        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
502                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
503                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
504        final ArrayList<String> words = new ArrayList<String>();
505        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
506        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
507        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
508        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
509                new HashMap<Pair<String, String>, Integer>();
510        for (int i = 0; i < initialUnigramCount; ++i) {
511            final String word = CodePointUtils.generateWord(random, codePointSet);
512            words.add(word);
513            final int unigramProbability = random.nextInt(0xFF);
514            unigramProbabilities.put(word, unigramProbability);
515            binaryDictionary.addUnigramWord(word, unigramProbability);
516        }
517        binaryDictionary.flushWithGC();
518        binaryDictionary.close();
519
520        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
521            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
522                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
523                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
524            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
525                // Add unigram.
526                if (random.nextFloat() < addUnigramProb) {
527                    final String word = CodePointUtils.generateWord(random, codePointSet);
528                    words.add(word);
529                    final int unigramProbability = random.nextInt(0xFF);
530                    unigramProbabilities.put(word, unigramProbability);
531                    binaryDictionary.addUnigramWord(word, unigramProbability);
532                }
533                // Add bigram.
534                if (random.nextFloat() < addBigramProb && words.size() > 2) {
535                    final int word0Index = random.nextInt(words.size());
536                    int word1Index = random.nextInt(words.size() - 1);
537                    if (word0Index <= word1Index) {
538                        word1Index++;
539                    }
540                    final String word0 = words.get(word0Index);
541                    final String word1 = words.get(word1Index);
542                    final int bigramProbability = random.nextInt(0xF);
543                    final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
544                    bigramWords.add(bigram);
545                    bigramProbabilities.put(bigram, bigramProbability);
546                    binaryDictionary.addBigramWords(word0, word1, bigramProbability);
547                }
548                // Remove bigram.
549                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
550                    final int bigramIndex = random.nextInt(bigramWords.size());
551                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
552                    bigramWords.remove(bigramIndex);
553                    bigramProbabilities.remove(bigram);
554                    binaryDictionary.removeBigramWords(bigram.first, bigram.second);
555                }
556            }
557
558            // Test whether the all unigram operations are collectlly handled.
559            for (int i = 0; i < words.size(); i++) {
560                final String word = words.get(i);
561                final int unigramProbability = unigramProbabilities.get(word);
562                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
563            }
564            // Test whether the all bigram operations are collectlly handled.
565            for (int i = 0; i < bigramWords.size(); i++) {
566                final Pair<String, String> bigram = bigramWords.get(i);
567                final int unigramProbability = unigramProbabilities.get(bigram.second);
568                final int probability;
569                if (bigramProbabilities.containsKey(bigram)) {
570                    final int bigramProbability = bigramProbabilities.get(bigram);
571                    probability = binaryDictionary.calculateProbability(unigramProbability,
572                            bigramProbability);
573                } else {
574                    probability = Dictionary.NOT_A_PROBABILITY;
575                }
576                assertEquals(probability,
577                        binaryDictionary.getBigramProbability(bigram.first, bigram.second));
578            }
579            binaryDictionary.flushWithGC();
580            binaryDictionary.close();
581        }
582
583        dictFile.delete();
584    }
585
586    public void testAddManyUnigramsAndFlushWithGC() {
587        final int flashWithGCIterationCount = 3;
588        final int codePointSetSize = 50;
589        final int seed = 22360679;
590
591        final Random random = new Random(seed);
592
593        File dictFile = null;
594        try {
595            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
596        } catch (IOException e) {
597            fail("IOException while writing an initial dictionary : " + e);
598        }
599
600        final ArrayList<String> words = new ArrayList<String>();
601        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
602        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
603
604        BinaryDictionary binaryDictionary;
605        for (int i = 0; i < flashWithGCIterationCount; i++) {
606            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
607                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
608                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
609            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
610                final String word = CodePointUtils.generateWord(random, codePointSet);
611                words.add(word);
612                final int unigramProbability = random.nextInt(0xFF);
613                unigramProbabilities.put(word, unigramProbability);
614                binaryDictionary.addUnigramWord(word, unigramProbability);
615            }
616
617            for (int j = 0; j < words.size(); j++) {
618                final String word = words.get(j);
619                final int unigramProbability = unigramProbabilities.get(word);
620                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
621            }
622
623            binaryDictionary.flushWithGC();
624            binaryDictionary.close();
625        }
626
627        dictFile.delete();
628    }
629
630    public void testUnigramAndBigramCount() {
631        final int flashWithGCIterationCount = 10;
632        final int codePointSetSize = 50;
633        final int unigramCountPerIteration = 1000;
634        final int bigramCountPerIteration = 2000;
635        final int seed = 1123581321;
636
637        final Random random = new Random(seed);
638
639        File dictFile = null;
640        try {
641            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
642        } catch (IOException e) {
643            fail("IOException while writing an initial dictionary : " + e);
644        }
645
646        final ArrayList<String> words = new ArrayList<String>();
647        final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
648        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
649
650        BinaryDictionary binaryDictionary;
651        for (int i = 0; i < flashWithGCIterationCount; i++) {
652            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
653                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
654                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
655            for (int j = 0; j < unigramCountPerIteration; j++) {
656                final String word = CodePointUtils.generateWord(random, codePointSet);
657                words.add(word);
658                final int unigramProbability = random.nextInt(0xFF);
659                binaryDictionary.addUnigramWord(word, unigramProbability);
660            }
661            for (int j = 0; j < bigramCountPerIteration; j++) {
662                final String word0 = words.get(random.nextInt(words.size()));
663                final String word1 = words.get(random.nextInt(words.size()));
664                bigrams.add(new Pair<String, String>(word0, word1));
665                final int bigramProbability = random.nextInt(0xF);
666                binaryDictionary.addBigramWords(word0, word1, bigramProbability);
667            }
668            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
669                    binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
670            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
671                    binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
672            binaryDictionary.flushWithGC();
673            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
674                    binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
675            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
676                    binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
677            binaryDictionary.close();
678        }
679
680        dictFile.delete();
681    }
682}
683