12fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa/* 22fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Copyright (C) 2013, The Android Open Source Project 32fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 42fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Licensed under the Apache License, Version 2.0 (the "License"); 52fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * you may not use this file except in compliance with the License. 62fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * You may obtain a copy of the License at 72fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 82fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * http://www.apache.org/licenses/LICENSE-2.0 92fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * 102fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * Unless required by applicable law or agreed to in writing, software 112fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * distributed under the License is distributed on an "AS IS" BASIS, 122fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * See the License for the specific language governing permissions and 142fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa * limitations under the License. 152fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa */ 162fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 1788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/v4/ver4_dict_constants.h" 182fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 192fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasanamespace latinime { 202fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 212ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body"; 222fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header"; 232fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 242fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets. 25a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardconst int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024; 262fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// Extended region size, which is not GCed region size in dict file + additional buffer size, is 272fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa// limited to 1MB to prevent from inefficient traversing. 282fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024; 292fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 30851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable. 31851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model. 32fe395232d69df0887863c1cbabe63def2586d29eKeisuke Kuroyanagi// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for shortcut. 332ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE = 34851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2 35c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagi + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT 36fe395232d69df0887863c1cbabe63def2586d29eKeisuke Kuroyanagi + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT; 372ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst int Ver4DictConstants::TRIE_BUFFER_INDEX = 0; 382ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX = 392ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagi TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; 40c4696b2eb6b25eea4d5c869683104ab99aec0421Keisuke Kuroyanagiconst int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX = 41851e0458fe460526b1f953e39a1e406a21ab4647Keisuke Kuroyanagi TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT; 422ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX = 43fe395232d69df0887863c1cbabe63def2586d29eKeisuke Kuroyanagi LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT; 442ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagi 452fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::NOT_A_TERMINAL_ID = -1; 462fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::PROBABILITY_SIZE = 1; 479a23f0fba25137760a60e9bfaf6bf20a5889648cKeisuke Kuroyanagiconst int Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE = 1; 482fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3; 492fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0; 502fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4; 512fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4; 522383575d2d695efcca093e69ed2daa88aec58862Keisuke Kuroyanagiconst int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 0; 532383575d2d695efcca093e69ed2daa88aec58862Keisuke Kuroyanagiconst int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 2; 542fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 55623067a183caf62fbe33223675430a246b5ae13dKeisuke Kuroyanagiconst uint8_t Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE = 0x1; 564926b90ec530ba1e247b7a0f6edd719b2b01870bKeisuke Kuroyanagiconst uint8_t Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY = 0x2; 577d911d6f91af56586fbca40672bfb77b494ee871Keisuke Kuroyanagiconst uint8_t Ver4DictConstants::FLAG_NOT_A_WORD = 0x4; 587d911d6f91af56586fbca40672bfb77b494ee871Keisuke Kuroyanagiconst uint8_t Ver4DictConstants::FLAG_BLACKLISTED = 0x8; 597d911d6f91af56586fbca40672bfb77b494ee871Keisuke Kuroyanagiconst uint8_t Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE = 0x10; 60623067a183caf62fbe33223675430a246b5ae13dKeisuke Kuroyanagi 61c1163c85187bf4deddb54c64f05e2f5ec97e08b4Keisuke Kuroyanagiconst int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64; 622fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4; 632fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 642fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1; 652fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F; 662fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasaconst int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80; 672fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa 682ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1; 692ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagiconst size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3; 706b0561f9d26215209e8e8895f5c35982af5158f0Keisuke Kuroyanagiconst size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 2; 712ac934296c0571ea252f3fb5a23fba29eb89c666Keisuke Kuroyanagi 722fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa} // namespace latinime 73