1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import android.test.AndroidTestCase;
20import android.test.MoreAsserts;
21import android.test.suitebuilder.annotation.LargeTest;
22import android.util.Log;
23
24import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
25import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
26import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
27import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
28import com.android.inputmethod.latin.utils.CollectionUtils;
29
30import java.io.File;
31import java.io.IOException;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Random;
35
36@LargeTest
37public class BinaryDictIOUtilsTests extends AndroidTestCase {
38    private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
39    private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
40            new FormatSpec.FormatOptions(3, true);
41
42    private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
43    public static final int DEFAULT_MAX_UNIGRAMS = 1500;
44    private final int mMaxUnigrams;
45
46    private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
47
48    private static final int VERSION3 = 3;
49    private static final int VERSION4 = 4;
50
51    private static final String[] CHARACTERS = {
52        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
53        "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
54        "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
55        "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
56        "\uD841\uDE28" /* �� */, "\uD840\uDC0B" /* �� */, "\uD861\uDED7" /* �� */ // surrogate pair
57    };
58
59    public BinaryDictIOUtilsTests() {
60        // 1500 is the default max unigrams
61        this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
62    }
63
64    public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
65        super();
66        Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
67        mMaxUnigrams = maxUnigrams;
68        final Random random = new Random(seed);
69        sWords.clear();
70        for (int i = 0; i < maxUnigrams; ++i) {
71            sWords.add(generateWord(random.nextInt()));
72        }
73    }
74
75    // Utilities for test
76    private String generateWord(final int value) {
77        final int lengthOfChars = CHARACTERS.length;
78        StringBuilder builder = new StringBuilder("");
79        long lvalue = Math.abs((long)value);
80        while (lvalue > 0) {
81            builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
82            lvalue /= lengthOfChars;
83        }
84        if (builder.toString().equals("")) return "a";
85        return builder.toString();
86    }
87
88    private static void printPtNode(final PtNodeInfo info) {
89        Log.d(TAG, "    PtNode at " + info.mOriginalAddress);
90        Log.d(TAG, "        flags = " + info.mFlags);
91        Log.d(TAG, "        parentAddress = " + info.mParentAddress);
92        Log.d(TAG, "        characters = " + new String(info.mCharacters, 0,
93                info.mCharacters.length));
94        if (info.mFrequency != -1) Log.d(TAG, "        frequency = " + info.mFrequency);
95        if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
96            Log.d(TAG, "        children address = no children address");
97        } else {
98            Log.d(TAG, "        children address = " + info.mChildrenAddress);
99        }
100        if (info.mShortcutTargets != null) {
101            for (final WeightedString ws : info.mShortcutTargets) {
102                Log.d(TAG, "        shortcuts = " + ws.mWord);
103            }
104        }
105        if (info.mBigrams != null) {
106            for (final PendingAttribute attr : info.mBigrams) {
107                Log.d(TAG, "        bigram = " + attr.mAddress);
108            }
109        }
110        Log.d(TAG, "    end address = " + info.mEndAddress);
111    }
112
113    private static void printNode(final Ver3DictDecoder dictDecoder,
114            final FormatSpec.FormatOptions formatOptions) {
115        final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
116        Log.d(TAG, "Node at " + dictBuffer.position());
117        final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
118        Log.d(TAG, "    ptNodeCount = " + count);
119        for (int i = 0; i < count; ++i) {
120            final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
121                    formatOptions);
122            printPtNode(currentInfo);
123        }
124        if (formatOptions.mSupportsDynamicUpdate) {
125            final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
126            Log.d(TAG, "    forwardLinkAddress = " + forwardLinkAddress);
127        }
128    }
129
130    @SuppressWarnings("unused")
131    private static void printBinaryFile(final Ver3DictDecoder dictDecoder)
132            throws IOException, UnsupportedFormatException {
133        final FileHeader fileHeader = dictDecoder.readHeader();
134        final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
135        while (dictBuffer.position() < dictBuffer.limit()) {
136            printNode(dictDecoder, fileHeader.mFormatOptions);
137        }
138    }
139
140    private int getWordPosition(final File file, final String word) {
141        int position = FormatSpec.NOT_VALID_WORD;
142
143        try {
144            final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
145                    DictDecoder.USE_READONLY_BYTEBUFFER);
146            position = dictDecoder.getTerminalPosition(word);
147        } catch (IOException e) {
148        } catch (UnsupportedFormatException e) {
149        }
150        return position;
151    }
152
153    /**
154     * Find a word using the DictDecoder.
155     *
156     * @param dictDecoder the dict decoder
157     * @param word the word searched
158     * @return the found ptNodeInfo
159     * @throws IOException
160     * @throws UnsupportedFormatException
161     */
162    private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder,
163            final String word) throws IOException, UnsupportedFormatException {
164        int position = dictDecoder.getTerminalPosition(word);
165        if (position != FormatSpec.NOT_VALID_WORD) {
166            dictDecoder.setPosition(0);
167            final FileHeader header = dictDecoder.readHeader();
168            dictDecoder.setPosition(position);
169            return dictDecoder.readPtNode(position, header.mFormatOptions);
170        }
171        return null;
172    }
173
174    private PtNodeInfo findWordFromFile(final File file, final String word) {
175        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
176        PtNodeInfo info = null;
177        try {
178            dictDecoder.openDictBuffer();
179            info = findWordByBinaryDictReader(dictDecoder, word);
180        } catch (IOException e) {
181        } catch (UnsupportedFormatException e) {
182        }
183        return info;
184    }
185
186    // return amount of time to insert a word
187    private long insertAndCheckWord(final File file, final String word, final int frequency,
188            final boolean exist, final ArrayList<WeightedString> bigrams,
189            final ArrayList<WeightedString> shortcuts, final int formatVersion) {
190        long amountOfTime = -1;
191        try {
192            final DictUpdater dictUpdater;
193            if (formatVersion == VERSION3) {
194                dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
195            } else {
196                throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
197                        + " exist.");
198            }
199
200            if (!exist) {
201                assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
202            }
203            final long now = System.nanoTime();
204            dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false);
205            amountOfTime = System.nanoTime() - now;
206            MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
207        } catch (IOException e) {
208            Log.e(TAG, "Raised an IOException while inserting a word", e);
209        } catch (UnsupportedFormatException e) {
210            Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
211        }
212        return amountOfTime;
213    }
214
215    private void deleteWord(final File file, final String word, final int formatVersion) {
216        try {
217            final DictUpdater dictUpdater;
218            if (formatVersion == VERSION3) {
219                dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
220            } else {
221                throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
222                        + " exist.");
223            }
224            dictUpdater.deleteWord(word);
225        } catch (IOException e) {
226        } catch (UnsupportedFormatException e) {
227        }
228    }
229
230    private void checkReverseLookup(final File file, final String word, final int position) {
231
232        try {
233            final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
234            final FileHeader fileHeader = dictDecoder.readHeader();
235            assertEquals(word,
236                    BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
237                            position, fileHeader.mFormatOptions).mWord);
238        } catch (IOException e) {
239            Log.e(TAG, "Raised an IOException while looking up a word", e);
240        } catch (UnsupportedFormatException e) {
241            Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
242        }
243    }
244
245    private void runTestInsertWord(final int formatVersion) {
246        File file = null;
247        try {
248            file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION,
249                    getContext().getCacheDir());
250        } catch (IOException e) {
251            fail("IOException while creating temporary file: " + e);
252        }
253
254        // set an initial dictionary.
255        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
256                new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
257        dict.add("abcd", 10, null, false);
258
259        try {
260            final DictEncoder dictEncoder = new Ver3DictEncoder(file);
261            dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
262        } catch (IOException e) {
263            fail("IOException while writing an initial dictionary : " + e);
264        } catch (UnsupportedFormatException e) {
265            fail("UnsupportedFormatException while writing an initial dictionary : " + e);
266        }
267
268        MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
269        insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion);
270
271        insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion);
272        checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
273
274        insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion);
275        checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
276
277        // update the existing word.
278        insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion);
279
280        // split 1
281        insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion);
282
283        // split 2
284        insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion);
285
286        deleteWord(file, "ami", formatVersion);
287        assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
288
289        insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion);
290
291        deleteWord(file, "abcd", formatVersion);
292        assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
293    }
294
295    public void testInsertWord() {
296        runTestInsertWord(VERSION3);
297    }
298
299    private void runTestInsertWordWithBigrams(final int formatVersion) {
300        File file = null;
301        try {
302            file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION,
303                    getContext().getCacheDir());
304        } catch (IOException e) {
305            fail("IOException while creating temporary file: " + e);
306        }
307
308        // set an initial dictionary.
309        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
310                new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
311        dict.add("abcd", 10, null, false);
312        dict.add("efgh", 15, null, false);
313
314        try {
315            final DictEncoder dictEncoder = new Ver3DictEncoder(file);
316            dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
317        } catch (IOException e) {
318            fail("IOException while writing an initial dictionary : " + e);
319        } catch (UnsupportedFormatException e) {
320            fail("UnsupportedFormatException while writing an initial dictionary : " + e);
321        }
322
323        final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
324        banana.add(new WeightedString("banana", 10));
325
326        insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion);
327        insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion);
328
329        final PtNodeInfo info = findWordFromFile(file, "recursive");
330        int bananaPos = getWordPosition(file, "banana");
331        assertNotNull(info.mBigrams);
332        assertEquals(info.mBigrams.size(), 1);
333        assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
334    }
335
336    public void testInsertWordWithBigrams() {
337        runTestInsertWordWithBigrams(VERSION3);
338    }
339
340    private void runTestRandomWords(final int formatVersion) {
341        File file = null;
342        try {
343            file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION,
344                    getContext().getCacheDir());
345        } catch (IOException e) {
346        }
347        assertNotNull(file);
348
349        // set an initial dictionary.
350        final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
351                new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
352                        false));
353        dict.add("initial", 10, null, false);
354
355        try {
356            final DictEncoder dictEncoder = new Ver3DictEncoder(file);
357            dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
358        } catch (IOException e) {
359            assertTrue(false);
360        } catch (UnsupportedFormatException e) {
361            assertTrue(false);
362        }
363
364        long maxTimeToInsert = 0, sum = 0;
365        long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
366        int cnt = 0;
367        for (final String word : sWords) {
368            final long diff = insertAndCheckWord(file, word,
369                    cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion);
370            maxTimeToInsert = Math.max(maxTimeToInsert, diff);
371            minTimeToInsert = Math.min(minTimeToInsert, diff);
372            sum += diff;
373            cnt++;
374        }
375        cnt = 0;
376        for (final String word : sWords) {
377            MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
378        }
379
380        Log.d(TAG, "Test version " + formatVersion);
381        Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
382        Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
383        Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
384    }
385
386    public void testRandomWords() {
387        runTestRandomWords(VERSION3);
388    }
389}
390