ver4_dict_constants.cpp revision 851e0458fe460526b1f953e39a1e406a21ab4647
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
18
19namespace latinime {
20
21const char *const Ver4DictConstants::BODY_FILE_EXTENSION = ".body";
22const char *const Ver4DictConstants::HEADER_FILE_EXTENSION = ".header";
23
24// Version 4 dictionary size is implicitly limited to 8MB due to 3-byte offsets.
25const int Ver4DictConstants::MAX_DICTIONARY_SIZE = 8 * 1024 * 1024;
26// Extended region size, which is not GCed region size in dict file + additional buffer size, is
27// limited to 1MB to prevent from inefficient traversing.
28const int Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE = 1 * 1024 * 1024;
29
30// NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT for Trie and TerminalAddressLookupTable.
31// NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT for language model.
32// NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT for bigram and shortcut.
33const size_t Ver4DictConstants::NUM_OF_CONTENT_BUFFERS_IN_BODY_FILE =
34        NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT * 2
35                + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT
36                + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT * 2;
37const int Ver4DictConstants::TRIE_BUFFER_INDEX = 0;
38const int Ver4DictConstants::TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX =
39        TRIE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
40const int Ver4DictConstants::LANGUAGE_MODEL_BUFFER_INDEX =
41        TERMINAL_ADDRESS_LOOKUP_TABLE_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT;
42const int Ver4DictConstants::BIGRAM_BUFFERS_INDEX =
43        LANGUAGE_MODEL_BUFFER_INDEX + NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT;
44const int Ver4DictConstants::SHORTCUT_BUFFERS_INDEX =
45        BIGRAM_BUFFERS_INDEX + NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT;
46
47const int Ver4DictConstants::NOT_A_TERMINAL_ID = -1;
48const int Ver4DictConstants::PROBABILITY_SIZE = 1;
49const int Ver4DictConstants::FLAGS_IN_PROBABILITY_FILE_SIZE = 1;
50const int Ver4DictConstants::TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
51const int Ver4DictConstants::NOT_A_TERMINAL_ADDRESS = 0;
52const int Ver4DictConstants::TERMINAL_ID_FIELD_SIZE = 4;
53const int Ver4DictConstants::TIME_STAMP_FIELD_SIZE = 4;
54const int Ver4DictConstants::WORD_LEVEL_FIELD_SIZE = 1;
55const int Ver4DictConstants::WORD_COUNT_FIELD_SIZE = 1;
56
57const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 16;
58const int Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE = 4;
59const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE = 64;
60const int Ver4DictConstants::SHORTCUT_ADDRESS_TABLE_DATA_SIZE = 4;
61
62const int Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE = 3;
63// Unsigned int max value of BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE-byte is used for representing
64// invalid terminal ID in bigram lists.
65const int Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID =
66        (1 << (BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE * 8)) - 1;
67const int Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE = 1;
68const int Ver4DictConstants::BIGRAM_PROBABILITY_MASK = 0x0F;
69const int Ver4DictConstants::BIGRAM_IS_LINK_MASK = 0x80;
70const int Ver4DictConstants::BIGRAM_LARGE_PROBABILITY_FIELD_SIZE = 1;
71
72const int Ver4DictConstants::SHORTCUT_FLAGS_FIELD_SIZE = 1;
73const int Ver4DictConstants::SHORTCUT_PROBABILITY_MASK = 0x0F;
74const int Ver4DictConstants::SHORTCUT_HAS_NEXT_MASK = 0x80;
75
76const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SINGLE_DICT_CONTENT = 1;
77const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_SPARSE_TABLE_DICT_CONTENT = 3;
78const size_t Ver4DictConstants::NUM_OF_BUFFERS_FOR_LANGUAGE_MODEL_DICT_CONTENT = 1;
79
80} // namespace latinime
81