FormatSpec.java revision 7ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122
181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/* 281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Copyright (C) 2012 The Android Open Source Project 381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 48aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * Licensed under the Apache License, Version 2.0 (the "License"); 58aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * you may not use this file except in compliance with the License. 68aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * You may obtain a copy of the License at 781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 88aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * http://www.apache.org/licenses/LICENSE-2.0 981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 1081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software 118aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * distributed under the License is distributed on an "AS IS" BASIS, 128aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 138aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * See the License for the specific language governing permissions and 148aa9963a895f9dd5bb1bc92ab2e4f461e058f87aTadashi G. Takaoka * limitations under the License. 1581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 1681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 1781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapackage com.android.inputmethod.latin.makedict; 1881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 197d1ae52ded0deca6b2674df0273ec852ad36319fYuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting; 2081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadaimport com.android.inputmethod.latin.Constants; 211a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; 2281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada/** 2481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Dictionary File Format Specification. 2581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 2681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanadapublic final class FormatSpec { 2781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada /* 297ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * File header layout is as follows: 307ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 317ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * v | 327ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * e | MAGIC_NUMBER + version of the file format, 2 bytes. 337ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * r | 347ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * sion 357ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 367ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * o | 377ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * p | not used 4 bits 387ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * t | has bigrams ? 1 bit, 1 = yes, 0 = no : CONTAINS_BIGRAMS_FLAG 397ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * i | FRENCH_LIGATURE_PROCESSING_FLAG 407ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * o | supports dynamic updates ? 1 bit, 1 = yes, 0 = no : SUPPORTS_DYNAMIC_UPDATE 417ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * n | GERMAN_UMLAUT_PROCESSING_FLAG 427ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * f | 437ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * lags 447ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 457ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * h | 467ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * e | size of the file header, 4bytes 477ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * a | including the size of the magic number, the option flags and the header size 487ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * d | 497ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * ersize 507ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 517ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * | attributes list 527ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 537ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * attributes list is: 547ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * <key> = | string of characters at the char format described below, with the terminator used 557ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * | to signal the end of the string. 567ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * <value> = | string of characters at the char format described below, with the terminator used 577ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * | to signal the end of the string. 587ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * if the size of already read < headersize, goto key. 597ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada * 607ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada */ 617ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada 627ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada /* 6381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Array of Node(FusionDictionary.Node) layout is as follows: 6481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 6581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * g | 6681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | the number of groups, 1 or 2 bytes. 6781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * o | 1 byte = bbbbbbbb match 6881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * u | case 1xxxxxxx => xxxxxxx << 8 + next byte 6981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * p | otherwise => bbbbbbbb 7081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | 7181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * ount 7281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 7381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * g | 7481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | sequence of groups, 7581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * o | the layout of each group is described below. 7681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * u | 7781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * ps 7881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 79061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * f | 808ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * o | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header) 81061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada * r | forward link address, 3byte 828ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * w | 1 byte = bbbbbbbb match 838ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * a | case 1xxxxxxx => -((xxxxxxx << 16) + (next byte << 8) + next byte) 848ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * r | otherwise => (xxxxxxx << 16) + (next byte << 8) + next byte 858ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * d | 868ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * linkaddress 8781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 8881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 8981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada /* Node(CharGroup) layout is as follows: 902ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | IF !SUPPORTS_DYNAMIC_UPDATE 912ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | addressType xx : mask with MASK_GROUP_ADDRESS_TYPE 922ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | 2 bits, 00 = no children : FLAG_GROUP_ADDRESS_TYPE_NOADDRESS 932ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * f | 01 = 1 byte : FLAG_GROUP_ADDRESS_TYPE_ONEBYTE 942ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * l | 10 = 2 bytes : FLAG_GROUP_ADDRESS_TYPE_TWOBYTES 952ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES 962ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * g | ELSE 974ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada * s | is moved ? 2 bits, 11 = no : FLAG_IS_NOT_MOVED 984ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada * | This must be the same as FLAG_GROUP_ADDRESS_TYPE_THREEBYTES 994ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada * | 01 = yes : FLAG_IS_MOVED 1002ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | the new address is stored in the same place as the parent address 101a853356b82e2dc74962243e3143c0ff7a33f3c20Yuichiro Hanada * | is deleted? 10 = yes : FLAG_IS_DELETED 1022ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS 1032ee70804e92b17016a2f042c4f6b0e94b5d23e88Yuichiro Hanada * | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL 10481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS 10581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS 10681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | is not a word ? 1 bit, 1 = yes, 0 = no : FLAG_IS_NOT_A_WORD 10781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | is blacklisted ? 1 bit, 1 = yes, 0 = no : FLAG_IS_BLACKLISTED 10881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 10981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * p | 1108ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * a | IF SUPPORTS_DYNAMIC_UPDATE (defined in the file header) 11181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | parent address, 3byte 1128ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * e | 1 byte = bbbbbbbb match 1138ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * n | case 1xxxxxxx => -((0xxxxxxx << 16) + (next byte << 8) + next byte) 1148ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * t | otherwise => (bbbbbbbb << 16) + (next byte << 8) + next byte 1158ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * a | 1168ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada * ddress 11781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 11881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | IF FLAG_HAS_MULTIPLE_CHARS 11981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers 12081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * a | end 1 byte, = 0 12181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | ELSE 12281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * s | char 1 or 3 bytes 12381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | END 12481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 12581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * f | 12681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | IF FLAG_IS_TERMINAL 12781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * e | frequency 1 byte 12881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * q | 12981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 13081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * c | IF 00 = FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = addressType 13181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * h | // nothing 13281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * i | ELSIF 01 = FLAG_GROUP_ADDRESS_TYPE_ONEBYTE == addressType 13381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * l | children address, 1 byte 13481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * d | ELSIF 10 = FLAG_GROUP_ADDRESS_TYPE_TWOBYTES == addressType 13581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * r | children address, 2 bytes 13681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * e | ELSE // 11 = FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = addressType 13781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * n | children address, 3 bytes 13881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * A | END 13981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * d 14081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * dress 14181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 14281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS 14381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | shortcut string list 14481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS 14581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | bigrams address list 14681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 14781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * Char format is: 14881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 1 byte = bbbbbbbb match 14981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte 15081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because 15181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with 15281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 00011111 would be outside unicode. 15381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * else: iso-latin-1 code 15481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * This allows for the whole unicode range to be encoded, including chars outside of 15581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control 15681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * characters which should never happen anyway (and still work, but take 3 bytes). 15781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 15881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * bigram address list is: 15981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT 16081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | addressSign = 1 bit, : FLAG_ATTRIBUTE_OFFSET_NEGATIVE 16181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 1 = must take -address, 0 = must take +address 16281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | xx : mask with MASK_ATTRIBUTE_ADDRESS_TYPE 16381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | addressFormat = 2 bits, 00 = unused : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE 16481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 01 = 1 byte : FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE 16581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 10 = 2 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES 16681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 11 = 3 bytes : FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES 16781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY 16881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <address> | IF (01 == FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE == addressFormat) 16981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 1 byte, add top 4 bits 17081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | ELSIF (10 == FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES == addressFormat) 17181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 2 bytes, add top 4 bits 17281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | ELSE // 11 == FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES == addressFormat 17381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | read 3 bytes, add top 4 bits 17481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | END 17581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | if (FLAG_ATTRIBUTE_OFFSET_NEGATIVE) then address = -address 17681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * if (FLAG_ATTRIBUTE_HAS_NEXT) goto bigram_and_shortcut_address_list_is 17781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * 17881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * shortcut string list is: 17981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <byte size> = GROUP_SHORTCUT_LIST_SIZE_SIZE bytes, big-endian: size of the list, in bytes. 18081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <flags> = | hasNext = 1 bit, 1 = yes, 0 = no : FLAG_ATTRIBUTE_HAS_NEXT 18181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | reserved = 3 bits, must be 0 18281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | 4 bits : frequency : mask with FLAG_ATTRIBUTE_FREQUENCY 18381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * <shortcut> = | string of characters at the char format described above, with the terminator 18481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * | used to signal the end of the string. 18581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada * if (FLAG_ATTRIBUTE_HAS_NEXT goto flags 18681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada */ 18781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 1887ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada public static final int MAGIC_NUMBER = 0x9BC13AFE; 1897ec9db2c34ee6bec2cbff6cf05cee9bf3c2f7122Yuichiro Hanada static final int MINIMUM_SUPPORTED_VERSION = 2; 19081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAXIMUM_SUPPORTED_VERSION = 3; 19181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NOT_A_VERSION_NUMBER = -1; 19282d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada static final int FIRST_VERSION_WITH_DYNAMIC_UPDATE = 3; 19381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 19481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // These options need to be the same numeric values as the one in the native reading code. 19581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; 196b686df15fcc95611c524318359fe9ecb4fd6f74cYuichiro Hanada // TODO: Make the native reading code read this variable. 19782d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada static final int SUPPORTS_DYNAMIC_UPDATE = 0x2; 19881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; 19981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int CONTAINS_BIGRAMS_FLAG = 0x8; 20081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 20181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // TODO: Make this value adaptative to content data, store it in the header, and 20281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // use it in the reading code. 203ffcbbaf12788a9fc9398607a548e552d7d2bf05eSatoshi Kataoka static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH; 20481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 20581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int PARENT_ADDRESS_SIZE = 3; 206061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada static final int FORWARD_LINK_ADDRESS_SIZE = 3; 20781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 2084ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada // These flags are used only in the static dictionary. 20981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MASK_GROUP_ADDRESS_TYPE = 0xC0; 21081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_NOADDRESS = 0x00; 21181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_ONEBYTE = 0x40; 21281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_TWOBYTES = 0x80; 21381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_GROUP_ADDRESS_TYPE_THREEBYTES = 0xC0; 21481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 21581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_MULTIPLE_CHARS = 0x20; 21681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 21781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_TERMINAL = 0x10; 21881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; 21981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_HAS_BIGRAMS = 0x04; 22081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_NOT_A_WORD = 0x02; 22181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_IS_BLACKLISTED = 0x01; 2224ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada 2234ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada // These flags are used only in the dynamic dictionary. 224c3a98ca306d5d6a3dfce3585b73f7431dbf90bfcYuichiro Hanada static final int MASK_MOVE_AND_DELETE_FLAG = 0xC0; 2254ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada static final int FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE = 0x40; 2264ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada static final int FLAG_IS_MOVED = 0x00 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE; 2274ad4ff618f5102148d73e3c04d809942bcf16f86Yuichiro Hanada static final int FLAG_IS_NOT_MOVED = 0x80 | FIXED_BIT_OF_DYNAMIC_UPDATE_MOVE; 228a853356b82e2dc74962243e3143c0ff7a33f3c20Yuichiro Hanada static final int FLAG_IS_DELETED = 0x80; 22981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 23081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; 23181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; 23281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; 23381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE = 0x10; 23481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; 23581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; 23681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int FLAG_ATTRIBUTE_FREQUENCY = 0x0F; 23781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 23881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_CHARACTERS_TERMINATOR = 0x1F; 23981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 24081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_TERMINATOR_SIZE = 1; 24181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_FLAGS_SIZE = 1; 24281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_FREQUENCY_SIZE = 1; 24381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_MAX_ADDRESS_SIZE = 3; 24481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_ATTRIBUTE_FLAGS_SIZE = 1; 24581d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_ATTRIBUTE_MAX_ADDRESS_SIZE = 3; 24681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int GROUP_SHORTCUT_LIST_SIZE_SIZE = 2; 24781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 24881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE; 24981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int NO_PARENT_ADDRESS = 0; 250061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada static final int NO_FORWARD_LINK_ADDRESS = 0; 25181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int INVALID_CHARACTER = -1; 25281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 25381d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT = 0x7F; // 127 25481d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_CHARGROUPS_IN_A_NODE = 0x7FFF; // 32767 2557223cc2ef1d7fd4ad4ab62166114b36ce7313c55Yuichiro Hanada static final int MAX_BIGRAMS_IN_A_GROUP = 10000; 25681d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 25781d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_TERMINAL_FREQUENCY = 255; 25881d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada static final int MAX_BIGRAM_FREQUENCY = 15; 25981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada 26047cac57e4593f47e753410e4199e84e458d6de6fJean Chalard public static final int SHORTCUT_WHITELIST_FREQUENCY = 15; 26147cac57e4593f47e753410e4199e84e458d6de6fJean Chalard 262d36245fad292ea660ca49f38a3ec36e07727dda5Yuichiro Hanada // This option needs to be the same numeric value as the one in binary_format.h. 263d36245fad292ea660ca49f38a3ec36e07727dda5Yuichiro Hanada static final int NOT_VALID_WORD = -99; 2648ec0064c49e80945dbe1bb31129eb890478b7e06Yuichiro Hanada static final int SIGNED_CHILDREN_ADDRESS_SIZE = 3; 265d36245fad292ea660ca49f38a3ec36e07727dda5Yuichiro Hanada 2661a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada /** 2671a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada * Options about file format. 2681a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada */ 269a28a05e971cc242b338331a3b78276fa95188d19Tadashi G. Takaoka public static final class FormatOptions { 2701a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final int mVersion; 27182d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada public final boolean mSupportsDynamicUpdate; 2727d1ae52ded0deca6b2674df0273ec852ad36319fYuichiro Hanada @UsedForTesting 2731a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public FormatOptions(final int version) { 2741a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada this(version, false); 2751a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2767d1ae52ded0deca6b2674df0273ec852ad36319fYuichiro Hanada 2777d1ae52ded0deca6b2674df0273ec852ad36319fYuichiro Hanada @UsedForTesting 27882d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada public FormatOptions(final int version, final boolean supportsDynamicUpdate) { 2791a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mVersion = version; 28082d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada if (version < FIRST_VERSION_WITH_DYNAMIC_UPDATE && supportsDynamicUpdate) { 28182d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada throw new RuntimeException("Dynamic updates are only supported with versions " 28282d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada + FIRST_VERSION_WITH_DYNAMIC_UPDATE + " and ulterior."); 283061d225fb1d110695b396a470d9ae6a9a3331003Yuichiro Hanada } 28482d9deaaf252cd20f8918adbc7a4b9b8f2647c38Yuichiro Hanada mSupportsDynamicUpdate = supportsDynamicUpdate; 2851a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2861a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 2871a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada 2881a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada /** 2891a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada * Class representing file header. 2901a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada */ 291fbc5e9b33469f989a745003c8e242c3f002ed165Jean Chalard public static final class FileHeader { 2921a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final int mHeaderSize; 2931a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final DictionaryOptions mDictionaryOptions; 2941a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public final FormatOptions mFormatOptions; 295af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard private static final String DICTIONARY_VERSION_ATTRIBUTE = "version"; 296af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard private static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale"; 2972521edec09373b2810093462c89221a2aca9e369Jean Chalard private static final String DICTIONARY_ID_ATTRIBUTE = "dictionary"; 298c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description"; 2991a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions, 3001a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada final FormatOptions formatOptions) { 3011a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mHeaderSize = headerSize; 3021a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mDictionaryOptions = dictionaryOptions; 3031a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada mFormatOptions = formatOptions; 3041a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 305af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard 306af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard // Helper method to get the locale as a String 307af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard public String getLocaleString() { 308af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_LOCALE_ATTRIBUTE); 309af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard } 310af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard 311af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard // Helper method to get the version String 312af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard public String getVersion() { 313af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_VERSION_ATTRIBUTE); 314af4a7e8c4b2a41e9be48965133ab489cc9484764Jean Chalard } 3152521edec09373b2810093462c89221a2aca9e369Jean Chalard 3162521edec09373b2810093462c89221a2aca9e369Jean Chalard // Helper method to get the dictionary ID as a String 3172521edec09373b2810093462c89221a2aca9e369Jean Chalard public String getId() { 3182521edec09373b2810093462c89221a2aca9e369Jean Chalard return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_ID_ATTRIBUTE); 3192521edec09373b2810093462c89221a2aca9e369Jean Chalard } 320c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard 321c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard // Helper method to get the description 322c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard public String getDescription() { 323c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard // TODO: Right now each dictionary file comes with a description in its own language. 324c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard // It will display as is no matter the device's locale. It should be internationalized. 325c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard return mDictionaryOptions.mAttributes.get(FileHeader.DICTIONARY_DESCRIPTION_ATTRIBUTE); 326c6799ffeab17d3e0dc54a1718dad9890e5493ae0Jean Chalard } 3271a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada } 3281a347723c5ad4a71076df67f3af3b702db205719Yuichiro Hanada 32981d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada private FormatSpec() { 33081d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada // This utility class is not publicly instantiable. 33181d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada } 33281d97eec0e77e72cce606f9c9f96091c0b348190Yuichiro Hanada} 333