FormatSpec.java revision 061d225fb1d110695b396a470d9ae6a9a3331003
181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/*
281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Copyright (C) 2012 The Android Open Source Project
381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada *
481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License"); you may not
581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * use this file except in compliance with the License. You may obtain a copy of
681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the License at
781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada *
881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * http://www.apache.org/licenses/LICENSE-2.0
981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada *
1081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software
1181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
1281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * License for the specific language governing permissions and limitations under
1481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the License.
1581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */
1681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
1781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapackage com.android.inputmethod.latin.makedict;
1881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
1981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadaimport com.android.inputmethod.latin.Constants;
201a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
2181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
2281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/**
2381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Dictionary File Format Specification.
2481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */
2581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapublic final class FormatSpec {
2681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
2781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    /*
2881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * Array of Node(FusionDictionary.Node) layout is as follows:
2981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
3081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * g |
3181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r | the number of groups, 1 or 2 bytes.
3281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * o | 1 byte = bbbbbbbb match
3381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * u |   case 1xxxxxxx => xxxxxxx << 8 + next byte
3481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * p |   otherwise => bbbbbbbb
3581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * c |
3681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * ount
3781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
3881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * g |
3981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r | sequence of groups,
4081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * o | the layout of each group is described below.
4181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * u |
4281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * ps
4381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
44061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * f |
45061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * o | IF HAS_LINKEDLIST_NODE (defined in the file header)
46061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * r |     forward link address, 3byte
47061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * w | the address must be positive.
48061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * a |
49061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada     * rdlinkaddress
5081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     */
5181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
5281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    /* Node(CharGroup) layout is as follows:
5381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | addressType                         xx     : mask with MASK_GROUP_ADDRESS_TYPE
5481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *                                 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS
5581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * f |                                     01 = 1 byte      : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE
5681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * l |                                     10 = 2 bytes     : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES
5781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * a |                                     11 = 3 bytes     : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES
5881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * g | has several chars ?         1 bit, 1 = yes, 0 = no   : FLAG_HAS_MULTIPLE_CHARS
5981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * s | has a terminal ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_TERMINAL
6081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | has shortcut targets ?      1 bit, 1 = yes, 0 = no   : FLAG_HAS_SHORTCUT_TARGETS
6181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | has bigrams ?               1 bit, 1 = yes, 0 = no   : FLAG_HAS_BIGRAMS
6281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | is not a word ?             1 bit, 1 = yes, 0 = no   : FLAG_IS_NOT_A_WORD
6381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | is blacklisted ?            1 bit, 1 = yes, 0 = no   : FLAG_IS_BLACKLISTED
6481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
6581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * p |
6681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * a | IF HAS_PARENT_ADDRESS (defined in the file header)
6781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r |     parent address, 3byte
6881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * e | the address must be negative, so the absolute value of the address is stored.
6981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * n |
7081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * taddress
7181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
7281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * c | IF FLAG_HAS_MULTIPLE_CHARS
7381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * h |   char, char, char, char    n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers
7481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * a |   end                       1 byte, = 0
7581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r | ELSE
7681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * s |   char                      1 or 3 bytes
7781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | END
7881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
7981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * f |
8081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r | IF FLAG_IS_TERMINAL
8181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * e |   frequency                 1 byte
8281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * q |
8381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
8481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType
8581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * h |   // nothing
8681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType
8781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * l |   children address, 1 byte
8881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType
8981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * r |   children address, 2 bytes
9081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType
9181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * n |   children address, 3 bytes
9281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * A | END
9381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * d
9481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * dress
9581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
9681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS
9781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | shortcut string list
9881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS
9981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *   | bigrams address list
10081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
10181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * Char format is:
10281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * 1 byte = bbbbbbbb match
10381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
10481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
10581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *       unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
10681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *       00011111 would be outside unicode.
10781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * else: iso-latin-1 code
10881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * This allows for the whole unicode range to be encoded, including chars outside of
10981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
11081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * characters which should never happen anyway (and still work, but take 3 bytes).
11181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
11281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * bigram address list is:
11381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no     : FLAG_ATTRIBUTE_HAS_NEXT
11481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | addressSign = 1 bit,                 : FLAG_ATTRIBUTE_OFFSET_NEGATIVE
11581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |                      1 = must take -address, 0 = must take +address
11681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |                         xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE
11781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | addressFormat = 2 bits, 00 = unused  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
11881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |                         01 = 1 byte  : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE
11981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |                         10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES
12081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |                         11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES
12181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | 4 bits : frequency         : mask with FLAG_ATTRIBUTE_FREQUENCY
12281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat)
12381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |   read 1 byte, add top 4 bits
12481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat)
12581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |   read 2 bytes, add top 4 bits
12681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat
12781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           |   read 3 bytes, add top 4 bits
12881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | END
12981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *           | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address
13081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is
13181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *
13281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * shortcut string list is:
13381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes.
13481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * <flags>     = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT
13581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *               | reserved = 3 bits, must be 0
13681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *               | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY
13781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * <shortcut>  = | string of characters at the char format described above, with the terminator
13881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     *               | used to signal the end of the string.
13981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags
14081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada     */
14181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
14281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int VERSION_1_MAGIC_NUMBER = 0x78B1;
14381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE;
14481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MINIMUM_SUPPORTED_VERSION = 1;
14581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAXIMUM_SUPPORTED_VERSION = 3;
14681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int NOT_A_VERSION_NUMBER = -1;
14781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
14881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FIRST_VERSION_WITH_PARENT_ADDRESS = 3;
149061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada    static final int FIRST_VERSION_WITH_LINKEDLIST_NODE = 3;
15081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
15181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    // These options need to be the same numeric values as the one in the native reading code.
15281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1;
15381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int HAS_PARENT_ADDRESS = 0x2;
15481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4;
15581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int CONTAINS_BIGRAMS_FLAG = 0x8;
15681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
15781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    // TODO: Make this value adaptative to content data, store it in the header, and
15881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    // use it in the reading code.
15981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH;
16081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
16181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int PARENT_ADDRESS_SIZE = 3;
162061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada    static final int FORWARD_LINK_ADDRESS_SIZE = 3;
16381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
16481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MASK_GROUP_ADDRESS_TYPE = 0xC0;
16581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00;
16681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40;
16781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80;
16881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0;
16981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
17081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_HAS_MULTIPLE_CHARS = 0x20;
17181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
17281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_IS_TERMINAL = 0x10;
17381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08;
17481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_HAS_BIGRAMS = 0x04;
17581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_IS_NOT_A_WORD = 0x02;
17681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_IS_BLACKLISTED = 0x01;
17781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
17881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80;
17981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40;
18081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30;
18181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10;
18281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20;
18381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30;
18481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F;
18581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
18681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_CHARACTERS_TERMINATOR = 0x1F;
18781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
18881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_TERMINATOR_SIZE = 1;
18981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_FLAGS_SIZE = 1;
19081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_FREQUENCY_SIZE = 1;
19181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_MAX_ADDRESS_SIZE = 3;
19281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1;
19381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3;
19481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2;
19581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
19681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
19781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int NO_PARENT_ADDRESS = 0;
198061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada    static final int NO_FORWARD_LINK_ADDRESS = 0;
19981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int INVALID_CHARACTER = -1;
20081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
20181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127
20281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767
20381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
20481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAX_TERMINAL_FREQUENCY = 255;
20581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    static final int MAX_BIGRAM_FREQUENCY = 15;
20681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada
2071a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    /**
2081a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada     * Options about file format.
2091a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada     */
2101a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    public static class FormatOptions {
2111a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public final int mVersion;
2121a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public final boolean mHasParentAddress;
213061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada        public final boolean mHasLinkedListNode;
2141a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public FormatOptions(final int version) {
2151a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            this(version, false);
2161a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        }
2171a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public FormatOptions(final int version, final boolean hasParentAddress) {
218061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            this(version, hasParentAddress, false);
219061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada        }
220061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada        public FormatOptions(final int version, final boolean hasParentAddress,
221061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada                final boolean hasLinkedListNode) {
2221a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            mVersion = version;
223061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            if (version < FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) {
2241a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada                throw new RuntimeException("Parent addresses are only supported with versions "
225061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada                        + FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior.");
2261a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            }
2271a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            mHasParentAddress = hasParentAddress;
228061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada
229061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            if (version < FIRST_VERSION_WITH_LINKEDLIST_NODE && hasLinkedListNode) {
230061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada                throw new RuntimeException("Linked list nodes are only supported with versions "
231061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada                        + FIRST_VERSION_WITH_LINKEDLIST_NODE + " and ulterior.");
232061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            }
233061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            if (!hasParentAddress && hasLinkedListNode) {
234061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada                throw new RuntimeException("Linked list nodes need parent addresses.");
235061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            }
236061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada            mHasLinkedListNode = hasLinkedListNode;
2371a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        }
2381a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    }
2391a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada
2401a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    /**
2411a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada     * Class representing file header.
2421a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada     */
2431a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    static final class FileHeader {
2441a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public final int mHeaderSize;
2451a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public final DictionaryOptions mDictionaryOptions;
2461a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public final FormatOptions mFormatOptions;
2471a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
2481a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada                final FormatOptions formatOptions) {
2491a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            mHeaderSize = headerSize;
2501a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            mDictionaryOptions = dictionaryOptions;
2511a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada            mFormatOptions = formatOptions;
2521a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada        }
2531a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada    }
2541a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada
25581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    private FormatSpec() {
25681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada        // This utility class is not publicly instantiable.
25781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada    }
25881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada}
259