FormatSpec.java revision 061d225fb1d110695b396a470d9ae6a9a3331003
181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/* 281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Copyright (C) 2012 The Android Open Source Project 381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License"); you may not 581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * use this file except in compliance with the License. You may obtain a copy of 681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the License at 781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * http://www.apache.org/licenses/LICENSE-2.0 981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 1081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software 1181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 1281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 1381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * License for the specific language governing permissions and limitations under 1481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the License. 1581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 1681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 1781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapackage com.android.inputmethod.latin.makedict; 1881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 1981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadaimport com.android.inputmethod.latin.Constants; 201a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; 2181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/** 2381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Dictionary File Format Specification. 2481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 2581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapublic final class FormatSpec { 2681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada /* 2881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Array of Node(FusionDictionary.Node) layout is as follows: 2981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 3081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * g | 3181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | the number of groups, 1 or 2 bytes. 3281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * o | 1 byte = bbbbbbbb match 3381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * u | case 1xxxxxxx => xxxxxxx << 8 + next byte 3481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * p | otherwise => bbbbbbbb 3581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | 3681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * ount 3781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 3881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * g | 3981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | sequence of groups, 4081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * o | the layout of each group is described below. 4181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * u | 4281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * ps 4381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 44061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * f | 45061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * o | IF HAS_LINKEDLIST_NODE (defined in the file header) 46061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * r | forward link address, 3byte 47061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * w | the address must be positive. 48061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * a | 49061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * rdlinkaddress 5081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 5181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 5281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada /* Node(CharGroup) layout is as follows: 5381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE 5481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS 5581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE 5681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES 5781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES 5881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS 5981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL 6081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS 6181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS 6281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD 6381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED 6481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 6581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * p | 6681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * a | IF HAS_PARENT_ADDRESS (defined in the file header) 6781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | parent address, 3byte 6881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * e | the address must be negative, so the absolute value of the address is stored. 6981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * n | 7081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * taddress 7181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 7281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | IF FLAG_HAS_MULTIPLE_CHARS 7381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers 7481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * a | end 1 byte, = 0 7581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | ELSE 7681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * s | char 1 or 3 bytes 7781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | END 7881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 7981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * f | 8081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | IF FLAG_IS_TERMINAL 8181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * e | frequency 1 byte 8281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * q | 8381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 8481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType 8581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * h | // nothing 8681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType 8781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * l | children address, 1 byte 8881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType 8981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | children address, 2 bytes 9081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType 9181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * n | children address, 3 bytes 9281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * A | END 9381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * d 9481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * dress 9581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 9681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS 9781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | shortcut string list 9881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS 9981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | bigrams address list 10081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 10181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Char format is: 10281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 1 byte = bbbbbbbb match 10381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte 10481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because 10581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with 10681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 00011111 would be outside unicode. 10781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * else: iso-latin-1 code 10881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * This allows for the whole unicode range to be encoded, including chars outside of 10981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control 11081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * characters which should never happen anyway (and still work, but take 3 bytes). 11181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 11281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * bigram address list is: 11381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT 11481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | addressSign = 1 bit, : FLAG_ATTRIBUTE_OFFSET_NEGATIVE 11581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 1 = must take -address, 0 = must take +address 11681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE 11781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | addressFormat = 2 bits, 00 = unused : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE 11881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 01 = 1 byte : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE 11981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES 12081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES 12181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY 12281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat) 12381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 1 byte, add top 4 bits 12481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat) 12581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 2 bytes, add top 4 bits 12681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat 12781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 3 bytes, add top 4 bits 12881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | END 12981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address 13081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is 13181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 13281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * shortcut string list is: 13381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes. 13481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT 13581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | reserved = 3 bits, must be 0 13681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY 13781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <shortcut> = | string of characters at the char format described above, with the terminator 13881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | used to signal the end of the string. 13981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags 14081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 14181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 14281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int VERSION_1_MAGIC_NUMBER = 0x78B1; 14381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada public static final int VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; 14481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MINIMUM_SUPPORTED_VERSION = 1; 14581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAXIMUM_SUPPORTED_VERSION = 3; 14681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NOT_A_VERSION_NUMBER = -1; 14781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FIRST_VERSION_WITH_HEADER_SIZE = 2; 14881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FIRST_VERSION_WITH_PARENT_ADDRESS = 3; 149061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada static final int FIRST_VERSION_WITH_LINKEDLIST_NODE = 3; 15081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 15181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // These options need to be the same numeric values as the one in the native reading code. 15281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; 15381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int HAS_PARENT_ADDRESS = 0x2; 15481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; 15581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int CONTAINS_BIGRAMS_FLAG = 0x8; 15681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 15781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // TODO: Make this value adaptative to content data, store it in the header, and 15881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // use it in the reading code. 15981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_WORD_LENGTH = Constants.Dictionary.MAX_WORD_LENGTH; 16081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 16181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int PARENT_ADDRESS_SIZE = 3; 162061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada static final int FORWARD_LINK_ADDRESS_SIZE = 3; 16381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 16481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MASK_GROUP_ADDRESS_TYPE = 0xC0; 16581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; 16681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; 16781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; 16881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; 16981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 17081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_MULTIPLE_CHARS = 0x20; 17181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 17281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_TERMINAL = 0x10; 17381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; 17481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_BIGRAMS = 0x04; 17581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_NOT_A_WORD = 0x02; 17681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_BLACKLISTED = 0x01; 17781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 17881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; 17981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; 18081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; 18181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; 18281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; 18381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; 18481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F; 18581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 18681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_CHARACTERS_TERMINATOR = 0x1F; 18781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 18881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_TERMINATOR_SIZE = 1; 18981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_FLAGS_SIZE = 1; 19081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_FREQUENCY_SIZE = 1; 19181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_MAX_ADDRESS_SIZE = 3; 19281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1; 19381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; 19481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2; 19581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 19681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; 19781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NO_PARENT_ADDRESS = 0; 198061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada static final int NO_FORWARD_LINK_ADDRESS = 0; 19981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int INVALID_CHARACTER = -1; 20081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 20181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 20281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767 20381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 20481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_TERMINAL_FREQUENCY = 255; 20581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_BIGRAM_FREQUENCY = 15; 20681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2071a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada /** 2081a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada * Options about file format. 2091a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada */ 2101a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public static class FormatOptions { 2111a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final int mVersion; 2121a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final boolean mHasParentAddress; 213061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada public final boolean mHasLinkedListNode; 2141a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public FormatOptions(final int version) { 2151a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada this(version, false); 2161a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2171a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public FormatOptions(final int version, final boolean hasParentAddress) { 218061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada this(version, hasParentAddress, false); 219061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada } 220061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada public FormatOptions(final int version, final boolean hasParentAddress, 221061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada final boolean hasLinkedListNode) { 2221a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mVersion = version; 223061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada if (version < FIRST_VERSION_WITH_PARENT_ADDRESS && hasParentAddress) { 2241a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada throw new RuntimeException("Parent addresses are only supported with versions " 225061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada + FIRST_VERSION_WITH_PARENT_ADDRESS + " and ulterior."); 2261a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2271a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mHasParentAddress = hasParentAddress; 228061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada 229061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada if (version < FIRST_VERSION_WITH_LINKEDLIST_NODE && hasLinkedListNode) { 230061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada throw new RuntimeException("Linked list nodes are only supported with versions " 231061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada + FIRST_VERSION_WITH_LINKEDLIST_NODE + " and ulterior."); 232061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada } 233061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada if (!hasParentAddress && hasLinkedListNode) { 234061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada throw new RuntimeException("Linked list nodes need parent addresses."); 235061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada } 236061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada mHasLinkedListNode = hasLinkedListNode; 2371a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2381a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2391a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada 2401a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada /** 2411a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada * Class representing file header. 2421a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada */ 2431a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada static final class FileHeader { 2441a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final int mHeaderSize; 2451a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final DictionaryOptions mDictionaryOptions; 2461a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final FormatOptions mFormatOptions; 2471a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions, 2481a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada final FormatOptions formatOptions) { 2491a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mHeaderSize = headerSize; 2501a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mDictionaryOptions = dictionaryOptions; 2511a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mFormatOptions = formatOptions; 2521a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2531a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2541a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada 25581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada private FormatSpec() { 25681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // This utility class is not publicly instantiable. 25781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada } 25881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada} 259