1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.test.AndroidTestCase;
20import android.test.suitebuilder.annotation.LargeTest;
21import android.text.TextUtils;
22import android.util.Pair;
23
24import com.android.inputmethod.latin.makedict.CodePointUtils;
25import com.android.inputmethod.latin.makedict.FormatSpec;
26
27import java.io.File;
28import java.io.IOException;
29import java.util.ArrayList;
30import java.util.HashMap;
31import java.util.HashSet;
32import java.util.Locale;
33import java.util.Map;
34import java.util.Random;
35
36@LargeTest
37public class BinaryDictionaryTests extends AndroidTestCase {
38    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
39    private static final String TEST_LOCALE = "test";
40
41    @Override
42    protected void setUp() throws Exception {
43        super.setUp();
44    }
45
46    @Override
47    protected void tearDown() throws Exception {
48        super.tearDown();
49    }
50
51    private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
52        final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
53                getContext().getCacheDir());
54        Map<String, String> attributeMap = new HashMap<String, String>();
55        attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
56                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
57        if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
58                3 /* dictVersion */, attributeMap)) {
59            return file;
60        } else {
61            throw new IOException("Empty dictionary cannot be created.");
62        }
63    }
64
65    public void testIsValidDictionary() {
66        File dictFile = null;
67        try {
68            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
69        } catch (IOException e) {
70            fail("IOException while writing an initial dictionary : " + e);
71        }
72        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
73                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
74                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
75        assertTrue("binaryDictionary must be valid for existing valid dictionary file.",
76                binaryDictionary.isValidDictionary());
77        binaryDictionary.close();
78        assertFalse("binaryDictionary must be invalid after closing.",
79                binaryDictionary.isValidDictionary());
80        dictFile.delete();
81        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), 0 /* offset */,
82                dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(),
83                TEST_LOCALE, true /* isUpdatable */);
84        assertFalse("binaryDictionary must be invalid for not existing dictionary file.",
85                binaryDictionary.isValidDictionary());
86        binaryDictionary.close();
87    }
88
89    public void testAddUnigramWord() {
90        File dictFile = null;
91        try {
92            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
93        } catch (IOException e) {
94            fail("IOException while writing an initial dictionary : " + e);
95        }
96        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
97                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
98                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
99
100        final int probability = 100;
101        binaryDictionary.addUnigramWord("aaa", probability);
102        // Reallocate and create.
103        binaryDictionary.addUnigramWord("aab", probability);
104        // Insert into children.
105        binaryDictionary.addUnigramWord("aac", probability);
106        // Make terminal.
107        binaryDictionary.addUnigramWord("aa", probability);
108        // Create children.
109        binaryDictionary.addUnigramWord("aaaa", probability);
110        // Reallocate and make termianl.
111        binaryDictionary.addUnigramWord("a", probability);
112
113        final int updatedProbability = 200;
114        // Update.
115        binaryDictionary.addUnigramWord("aaa", updatedProbability);
116
117        assertEquals(probability, binaryDictionary.getFrequency("aab"));
118        assertEquals(probability, binaryDictionary.getFrequency("aac"));
119        assertEquals(probability, binaryDictionary.getFrequency("aa"));
120        assertEquals(probability, binaryDictionary.getFrequency("aaaa"));
121        assertEquals(probability, binaryDictionary.getFrequency("a"));
122        assertEquals(updatedProbability, binaryDictionary.getFrequency("aaa"));
123
124        dictFile.delete();
125    }
126
127    public void testRandomlyAddUnigramWord() {
128        final int wordCount = 1000;
129        final int codePointSetSize = 50;
130        final long seed = System.currentTimeMillis();
131
132        File dictFile = null;
133        try {
134            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
135        } catch (IOException e) {
136            fail("IOException while writing an initial dictionary : " + e);
137        }
138        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
139                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
140                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
141
142        final HashMap<String, Integer> probabilityMap = new HashMap<String, Integer>();
143        // Test a word that isn't contained within the dictionary.
144        final Random random = new Random(seed);
145        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
146        for (int i = 0; i < wordCount; ++i) {
147            final String word = CodePointUtils.generateWord(random, codePointSet);
148            probabilityMap.put(word, random.nextInt(0xFF));
149        }
150        for (String word : probabilityMap.keySet()) {
151            binaryDictionary.addUnigramWord(word, probabilityMap.get(word));
152        }
153        for (String word : probabilityMap.keySet()) {
154            assertEquals(word, (int)probabilityMap.get(word), binaryDictionary.getFrequency(word));
155        }
156        dictFile.delete();
157    }
158
159    public void testAddBigramWords() {
160        File dictFile = null;
161        try {
162            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
163        } catch (IOException e) {
164            fail("IOException while writing an initial dictionary : " + e);
165        }
166        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
167                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
168                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
169
170        final int unigramProbability = 100;
171        final int bigramProbability = 10;
172        final int updatedBigramProbability = 15;
173        binaryDictionary.addUnigramWord("aaa", unigramProbability);
174        binaryDictionary.addUnigramWord("abb", unigramProbability);
175        binaryDictionary.addUnigramWord("bcc", unigramProbability);
176        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
177        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
178        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
179        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
180
181        final int probability = binaryDictionary.calculateProbability(unigramProbability,
182                bigramProbability);
183        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
184        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
185        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
186        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
187        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
188        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
189        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
190        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
191
192        binaryDictionary.addBigramWords("aaa", "abb", updatedBigramProbability);
193        final int updatedProbability = binaryDictionary.calculateProbability(unigramProbability,
194                updatedBigramProbability);
195        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("aaa", "abb"));
196
197        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
198        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
199        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
200        assertEquals(Dictionary.NOT_A_PROBABILITY,
201                binaryDictionary.getBigramProbability("bcc", "aaa"));
202        assertEquals(Dictionary.NOT_A_PROBABILITY,
203                binaryDictionary.getBigramProbability("bcc", "bbc"));
204        assertEquals(Dictionary.NOT_A_PROBABILITY,
205                binaryDictionary.getBigramProbability("aaa", "aaa"));
206
207        // Testing bigram link.
208        binaryDictionary.addUnigramWord("abcde", unigramProbability);
209        binaryDictionary.addUnigramWord("fghij", unigramProbability);
210        binaryDictionary.addBigramWords("abcde", "fghij", bigramProbability);
211        binaryDictionary.addUnigramWord("fgh", unigramProbability);
212        binaryDictionary.addUnigramWord("abc", unigramProbability);
213        binaryDictionary.addUnigramWord("f", unigramProbability);
214        assertEquals(probability, binaryDictionary.getBigramProbability("abcde", "fghij"));
215        assertEquals(Dictionary.NOT_A_PROBABILITY,
216                binaryDictionary.getBigramProbability("abcde", "fgh"));
217        binaryDictionary.addBigramWords("abcde", "fghij", updatedBigramProbability);
218        assertEquals(updatedProbability, binaryDictionary.getBigramProbability("abcde", "fghij"));
219
220        dictFile.delete();
221    }
222
223    public void testRandomlyAddBigramWords() {
224        final int wordCount = 100;
225        final int bigramCount = 1000;
226        final int codePointSetSize = 50;
227        final long seed = System.currentTimeMillis();
228        final Random random = new Random(seed);
229
230        File dictFile = null;
231        try {
232            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
233        } catch (IOException e) {
234            fail("IOException while writing an initial dictionary : " + e);
235        }
236        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
237                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
238                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
239
240        final ArrayList<String> words = new ArrayList<String>();
241        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
242        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
243        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
244        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
245                new HashMap<Pair<String, String>, Integer>();
246
247        for (int i = 0; i < wordCount; ++i) {
248            final String word = CodePointUtils.generateWord(random, codePointSet);
249            words.add(word);
250            final int unigramProbability = random.nextInt(0xFF);
251            unigramProbabilities.put(word, unigramProbability);
252            binaryDictionary.addUnigramWord(word, unigramProbability);
253        }
254
255        for (int i = 0; i < bigramCount; i++) {
256            final String word0 = words.get(random.nextInt(wordCount));
257            final String word1 = words.get(random.nextInt(wordCount));
258            if (TextUtils.equals(word0, word1)) {
259                continue;
260            }
261            final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
262            bigramWords.add(bigram);
263            final int bigramProbability = random.nextInt(0xF);
264            bigramProbabilities.put(bigram, bigramProbability);
265            binaryDictionary.addBigramWords(word0, word1, bigramProbability);
266        }
267
268        for (final Pair<String, String> bigram : bigramWords) {
269            final int unigramProbability = unigramProbabilities.get(bigram.second);
270            final int bigramProbability = bigramProbabilities.get(bigram);
271            final int probability = binaryDictionary.calculateProbability(unigramProbability,
272                    bigramProbability);
273            assertEquals(probability,
274                    binaryDictionary.getBigramProbability(bigram.first, bigram.second));
275        }
276
277        dictFile.delete();
278    }
279
280    public void testRemoveBigramWords() {
281        File dictFile = null;
282        try {
283            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
284        } catch (IOException e) {
285            fail("IOException while writing an initial dictionary : " + e);
286        }
287        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
288                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
289                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
290        final int unigramProbability = 100;
291        final int bigramProbability = 10;
292        binaryDictionary.addUnigramWord("aaa", unigramProbability);
293        binaryDictionary.addUnigramWord("abb", unigramProbability);
294        binaryDictionary.addUnigramWord("bcc", unigramProbability);
295        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
296        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
297        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
298        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
299
300        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
301        assertEquals(true, binaryDictionary.isValidBigram("aaa", "bcc"));
302        assertEquals(true, binaryDictionary.isValidBigram("abb", "aaa"));
303        assertEquals(true, binaryDictionary.isValidBigram("abb", "bcc"));
304
305        binaryDictionary.removeBigramWords("aaa", "abb");
306        assertEquals(false, binaryDictionary.isValidBigram("aaa", "abb"));
307        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
308        assertEquals(true, binaryDictionary.isValidBigram("aaa", "abb"));
309
310
311        binaryDictionary.removeBigramWords("aaa", "bcc");
312        assertEquals(false, binaryDictionary.isValidBigram("aaa", "bcc"));
313        binaryDictionary.removeBigramWords("abb", "aaa");
314        assertEquals(false, binaryDictionary.isValidBigram("abb", "aaa"));
315        binaryDictionary.removeBigramWords("abb", "bcc");
316        assertEquals(false, binaryDictionary.isValidBigram("abb", "bcc"));
317
318        binaryDictionary.removeBigramWords("aaa", "abb");
319        // Test remove non-existing bigram operation.
320        binaryDictionary.removeBigramWords("aaa", "abb");
321        binaryDictionary.removeBigramWords("bcc", "aaa");
322
323        dictFile.delete();
324    }
325
326    public void testFlushDictionary() {
327        File dictFile = null;
328        try {
329            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
330        } catch (IOException e) {
331            fail("IOException while writing an initial dictionary : " + e);
332        }
333        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
334                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
335                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
336
337        final int probability = 100;
338        binaryDictionary.addUnigramWord("aaa", probability);
339        binaryDictionary.addUnigramWord("abcd", probability);
340        // Close without flushing.
341        binaryDictionary.close();
342
343        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
344                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
345                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
346
347        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
348        assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
349
350        binaryDictionary.addUnigramWord("aaa", probability);
351        binaryDictionary.addUnigramWord("abcd", probability);
352        binaryDictionary.flush();
353        binaryDictionary.close();
354
355        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
356                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
357                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
358
359        assertEquals(probability, binaryDictionary.getFrequency("aaa"));
360        assertEquals(probability, binaryDictionary.getFrequency("abcd"));
361        binaryDictionary.addUnigramWord("bcde", probability);
362        binaryDictionary.flush();
363        binaryDictionary.close();
364
365        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
366                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
367                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
368        assertEquals(probability, binaryDictionary.getFrequency("bcde"));
369        binaryDictionary.close();
370
371        dictFile.delete();
372    }
373
374    public void testFlushWithGCDictionary() {
375        File dictFile = null;
376        try {
377            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
378        } catch (IOException e) {
379            fail("IOException while writing an initial dictionary : " + e);
380        }
381        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
382                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
383                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
384
385        final int unigramProbability = 100;
386        final int bigramProbability = 10;
387        binaryDictionary.addUnigramWord("aaa", unigramProbability);
388        binaryDictionary.addUnigramWord("abb", unigramProbability);
389        binaryDictionary.addUnigramWord("bcc", unigramProbability);
390        binaryDictionary.addBigramWords("aaa", "abb", bigramProbability);
391        binaryDictionary.addBigramWords("aaa", "bcc", bigramProbability);
392        binaryDictionary.addBigramWords("abb", "aaa", bigramProbability);
393        binaryDictionary.addBigramWords("abb", "bcc", bigramProbability);
394        binaryDictionary.flushWithGC();
395        binaryDictionary.close();
396
397        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
398                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
399                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
400        final int probability = binaryDictionary.calculateProbability(unigramProbability,
401                bigramProbability);
402        assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa"));
403        assertEquals(unigramProbability, binaryDictionary.getFrequency("abb"));
404        assertEquals(unigramProbability, binaryDictionary.getFrequency("bcc"));
405        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "abb"));
406        assertEquals(probability, binaryDictionary.getBigramProbability("aaa", "bcc"));
407        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "aaa"));
408        assertEquals(probability, binaryDictionary.getBigramProbability("abb", "bcc"));
409        assertEquals(false, binaryDictionary.isValidBigram("bcc", "aaa"));
410        assertEquals(false, binaryDictionary.isValidBigram("bcc", "bbc"));
411        assertEquals(false, binaryDictionary.isValidBigram("aaa", "aaa"));
412        binaryDictionary.flushWithGC();
413        binaryDictionary.close();
414
415        dictFile.delete();
416    }
417
418    // TODO: Evaluate performance of GC
419    public void testAddBigramWordsAndFlashWithGC() {
420        final int wordCount = 100;
421        final int bigramCount = 1000;
422        final int codePointSetSize = 30;
423        final long seed = System.currentTimeMillis();
424        final Random random = new Random(seed);
425
426        File dictFile = null;
427        try {
428            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
429        } catch (IOException e) {
430            fail("IOException while writing an initial dictionary : " + e);
431        }
432
433        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
434                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
435                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
436
437        final ArrayList<String> words = new ArrayList<String>();
438        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
439        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
440        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
441        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
442                new HashMap<Pair<String, String>, Integer>();
443
444        for (int i = 0; i < wordCount; ++i) {
445            final String word = CodePointUtils.generateWord(random, codePointSet);
446            words.add(word);
447            final int unigramProbability = random.nextInt(0xFF);
448            unigramProbabilities.put(word, unigramProbability);
449            binaryDictionary.addUnigramWord(word, unigramProbability);
450        }
451
452        for (int i = 0; i < bigramCount; i++) {
453            final String word0 = words.get(random.nextInt(wordCount));
454            final String word1 = words.get(random.nextInt(wordCount));
455            if (TextUtils.equals(word0, word1)) {
456                continue;
457            }
458            final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
459            bigramWords.add(bigram);
460            final int bigramProbability = random.nextInt(0xF);
461            bigramProbabilities.put(bigram, bigramProbability);
462            binaryDictionary.addBigramWords(word0, word1, bigramProbability);
463        }
464
465        binaryDictionary.flushWithGC();
466        binaryDictionary.close();
467        binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
468                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
469                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
470
471        for (final Pair<String, String> bigram : bigramWords) {
472            final int unigramProbability = unigramProbabilities.get(bigram.second);
473            final int bigramProbability = bigramProbabilities.get(bigram);
474            final int probability = binaryDictionary.calculateProbability(unigramProbability,
475                    bigramProbability);
476            assertEquals(probability,
477                    binaryDictionary.getBigramProbability(bigram.first, bigram.second));
478        }
479
480        dictFile.delete();
481    }
482
483    public void testRandomOperetionsAndFlashWithGC() {
484        final int flashWithGCIterationCount = 50;
485        final int operationCountInEachIteration = 200;
486        final int initialUnigramCount = 100;
487        final float addUnigramProb = 0.5f;
488        final float addBigramProb = 0.8f;
489        final float removeBigramProb = 0.2f;
490        final int codePointSetSize = 30;
491
492        final long seed = System.currentTimeMillis();
493        final Random random = new Random(seed);
494
495        File dictFile = null;
496        try {
497            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
498        } catch (IOException e) {
499            fail("IOException while writing an initial dictionary : " + e);
500        }
501
502        BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
503                0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
504                Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
505        final ArrayList<String> words = new ArrayList<String>();
506        final ArrayList<Pair<String, String>> bigramWords = new ArrayList<Pair<String,String>>();
507        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
508        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
509        final HashMap<Pair<String, String>, Integer> bigramProbabilities =
510                new HashMap<Pair<String, String>, Integer>();
511        for (int i = 0; i < initialUnigramCount; ++i) {
512            final String word = CodePointUtils.generateWord(random, codePointSet);
513            words.add(word);
514            final int unigramProbability = random.nextInt(0xFF);
515            unigramProbabilities.put(word, unigramProbability);
516            binaryDictionary.addUnigramWord(word, unigramProbability);
517        }
518        binaryDictionary.flushWithGC();
519        binaryDictionary.close();
520
521        for (int gcCount = 0; gcCount < flashWithGCIterationCount; gcCount++) {
522            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
523                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
524                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
525            for (int opCount = 0; opCount < operationCountInEachIteration; opCount++) {
526                // Add unigram.
527                if (random.nextFloat() < addUnigramProb) {
528                    final String word = CodePointUtils.generateWord(random, codePointSet);
529                    words.add(word);
530                    final int unigramProbability = random.nextInt(0xFF);
531                    unigramProbabilities.put(word, unigramProbability);
532                    binaryDictionary.addUnigramWord(word, unigramProbability);
533                }
534                // Add bigram.
535                if (random.nextFloat() < addBigramProb && words.size() > 2) {
536                    final int word0Index = random.nextInt(words.size());
537                    int word1Index = random.nextInt(words.size() - 1);
538                    if (word0Index <= word1Index) {
539                        word1Index++;
540                    }
541                    final String word0 = words.get(word0Index);
542                    final String word1 = words.get(word1Index);
543                    if (TextUtils.equals(word0, word1)) {
544                        continue;
545                    }
546                    final int bigramProbability = random.nextInt(0xF);
547                    final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
548                    bigramWords.add(bigram);
549                    bigramProbabilities.put(bigram, bigramProbability);
550                    binaryDictionary.addBigramWords(word0, word1, bigramProbability);
551                }
552                // Remove bigram.
553                if (random.nextFloat() < removeBigramProb && !bigramWords.isEmpty()) {
554                    final int bigramIndex = random.nextInt(bigramWords.size());
555                    final Pair<String, String> bigram = bigramWords.get(bigramIndex);
556                    bigramWords.remove(bigramIndex);
557                    bigramProbabilities.remove(bigram);
558                    binaryDictionary.removeBigramWords(bigram.first, bigram.second);
559                }
560            }
561
562            // Test whether the all unigram operations are collectlly handled.
563            for (int i = 0; i < words.size(); i++) {
564                final String word = words.get(i);
565                final int unigramProbability = unigramProbabilities.get(word);
566                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
567            }
568            // Test whether the all bigram operations are collectlly handled.
569            for (int i = 0; i < bigramWords.size(); i++) {
570                final Pair<String, String> bigram = bigramWords.get(i);
571                final int unigramProbability = unigramProbabilities.get(bigram.second);
572                final int probability;
573                if (bigramProbabilities.containsKey(bigram)) {
574                    final int bigramProbability = bigramProbabilities.get(bigram);
575                    probability = binaryDictionary.calculateProbability(unigramProbability,
576                            bigramProbability);
577                } else {
578                    probability = Dictionary.NOT_A_PROBABILITY;
579                }
580                assertEquals(probability,
581                        binaryDictionary.getBigramProbability(bigram.first, bigram.second));
582            }
583            binaryDictionary.flushWithGC();
584            binaryDictionary.close();
585        }
586
587        dictFile.delete();
588    }
589
590    public void testAddManyUnigramsAndFlushWithGC() {
591        final int flashWithGCIterationCount = 3;
592        final int codePointSetSize = 50;
593
594        final long seed = System.currentTimeMillis();
595        final Random random = new Random(seed);
596
597        File dictFile = null;
598        try {
599            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
600        } catch (IOException e) {
601            fail("IOException while writing an initial dictionary : " + e);
602        }
603
604        final ArrayList<String> words = new ArrayList<String>();
605        final HashMap<String, Integer> unigramProbabilities = new HashMap<String, Integer>();
606        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
607
608        BinaryDictionary binaryDictionary;
609        for (int i = 0; i < flashWithGCIterationCount; i++) {
610            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
611                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
612                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
613            while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
614                final String word = CodePointUtils.generateWord(random, codePointSet);
615                words.add(word);
616                final int unigramProbability = random.nextInt(0xFF);
617                unigramProbabilities.put(word, unigramProbability);
618                binaryDictionary.addUnigramWord(word, unigramProbability);
619            }
620
621            for (int j = 0; j < words.size(); j++) {
622                final String word = words.get(j);
623                final int unigramProbability = unigramProbabilities.get(word);
624                assertEquals(word, unigramProbability, binaryDictionary.getFrequency(word));
625            }
626
627            binaryDictionary.flushWithGC();
628            binaryDictionary.close();
629        }
630
631        dictFile.delete();
632    }
633
634    public void testUnigramAndBigramCount() {
635        final int flashWithGCIterationCount = 10;
636        final int codePointSetSize = 50;
637        final int unigramCountPerIteration = 1000;
638        final int bigramCountPerIteration = 2000;
639        final long seed = System.currentTimeMillis();
640        final Random random = new Random(seed);
641
642        File dictFile = null;
643        try {
644            dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
645        } catch (IOException e) {
646            fail("IOException while writing an initial dictionary : " + e);
647        }
648
649        final ArrayList<String> words = new ArrayList<String>();
650        final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
651        final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
652
653        BinaryDictionary binaryDictionary;
654        for (int i = 0; i < flashWithGCIterationCount; i++) {
655            binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
656                    0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
657                    Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
658            for (int j = 0; j < unigramCountPerIteration; j++) {
659                final String word = CodePointUtils.generateWord(random, codePointSet);
660                words.add(word);
661                final int unigramProbability = random.nextInt(0xFF);
662                binaryDictionary.addUnigramWord(word, unigramProbability);
663            }
664            for (int j = 0; j < bigramCountPerIteration; j++) {
665                final String word0 = words.get(random.nextInt(words.size()));
666                final String word1 = words.get(random.nextInt(words.size()));
667                if (TextUtils.equals(word0, word1)) {
668                    continue;
669                }
670                bigrams.add(new Pair<String, String>(word0, word1));
671                final int bigramProbability = random.nextInt(0xF);
672                binaryDictionary.addBigramWords(word0, word1, bigramProbability);
673            }
674            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
675                    binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
676            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
677                    binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
678            binaryDictionary.flushWithGC();
679            assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
680                    binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
681            assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
682                    binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
683            binaryDictionary.close();
684        }
685
686        dictFile.delete();
687    }
688}
689