1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "dictionary/header/header_policy.h"
18
19#include <algorithm>
20
21#include "utils/ngram_utils.h"
22
23namespace latinime {
24
25// Note that these are corresponding definitions in Java side in DictionaryHeader.
26const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
27const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
28        "REQUIRES_GERMAN_UMLAUT_PROCESSING";
29// TODO: Change attribute string to "IS_DECAYING_DICT".
30const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
31const char *const HeaderPolicy::DATE_KEY = "date";
32const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
33const char *const HeaderPolicy::NGRAM_COUNT_KEYS[] =
34        {"UNIGRAM_COUNT", "BIGRAM_COUNT", "TRIGRAM_COUNT", "QUADGRAM_COUNT"};
35const char *const HeaderPolicy::MAX_NGRAM_COUNT_KEYS[] =
36        {"MAX_UNIGRAM_ENTRY_COUNT", "MAX_BIGRAM_ENTRY_COUNT", "MAX_TRIGRAM_ENTRY_COUNT",
37                "MAX_QUADGRAM_ENTRY_COUNT"};
38const int HeaderPolicy::DEFAULT_MAX_NGRAM_COUNTS[] = {10000, 30000, 30000, 30000};
39const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
40// Historical info is information that is needed to support decaying such as timestamp, level and
41// count.
42const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
43const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
44const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
45        "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
46
47const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
48const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
49const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
50
51// Used for logging. Question mark is used to indicate that the key is not found.
52void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
53        int outValueSize) const {
54    if (outValueSize <= 0) return;
55    if (outValueSize == 1) {
56        outValue[0] = '\0';
57        return;
58    }
59    std::vector<int> keyCodePointVector;
60    HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
61    DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
62            mAttributeMap.find(keyCodePointVector);
63    if (it == mAttributeMap.end()) {
64        // The key was not found.
65        outValue[0] = '?';
66        outValue[1] = '\0';
67        return;
68    }
69    const int terminalIndex = std::min(static_cast<int>(it->second.size()), outValueSize - 1);
70    for (int i = 0; i < terminalIndex; ++i) {
71        outValue[i] = it->second[i];
72    }
73    outValue[terminalIndex] = '\0';
74}
75
76const std::vector<int> HeaderPolicy::readLocale() const {
77    return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap, LOCALE_KEY);
78}
79
80float HeaderPolicy::readMultipleWordCostMultiplier() const {
81    const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
82            MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
83    if (demotionRate <= 0) {
84        return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
85    }
86    return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
87}
88
89bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
90    return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
91            REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
92}
93
94bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
95        const EntryCounts &entryCounts, const int extendedRegionSize,
96        BufferWithExtendableBuffer *const outBuffer) const {
97    int writingPos = 0;
98    DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
99    fillInHeader(updatesLastDecayedTime, entryCounts, extendedRegionSize, &attributeMapToWrite);
100    if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
101            &writingPos)) {
102        return false;
103    }
104    if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
105            &writingPos)) {
106        return false;
107    }
108    // Temporarily writes a dummy header size.
109    int headerSizeFieldPos = writingPos;
110    if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
111            &writingPos)) {
112        return false;
113    }
114    if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite,
115            &writingPos)) {
116        return false;
117    }
118    // Writes the actual header size.
119    if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
120            &headerSizeFieldPos)) {
121        return false;
122    }
123    return true;
124}
125
126namespace {
127
128int getIndexFromNgramType(const NgramType ngramType) {
129    return static_cast<int>(ngramType);
130}
131
132} // namespace
133
134void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime,
135        const EntryCounts &entryCounts, const int extendedRegionSize,
136        DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
137    for (const auto ngramType : AllNgramTypes::ASCENDING) {
138        HeaderReadWriteUtils::setIntAttribute(outAttributeMap,
139                NGRAM_COUNT_KEYS[getIndexFromNgramType(ngramType)],
140                entryCounts.getNgramCount(ngramType));
141    }
142    HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
143            extendedRegionSize);
144    // Set the current time as the generation time.
145    HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
146            TimeKeeper::peekCurrentTime());
147    HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY, mLocale);
148    if (updatesLastDecayedTime) {
149        // Set current time as the last updated time.
150        HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY,
151                TimeKeeper::peekCurrentTime());
152    }
153}
154
155/* static */ DictionaryHeaderStructurePolicy::AttributeMap
156        HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
157    DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
158    HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
159    return attributeMap;
160}
161
162/* static */ const EntryCounts HeaderPolicy::readNgramCounts() const {
163    MutableEntryCounters entryCounters;
164    for (const auto ngramType : AllNgramTypes::ASCENDING) {
165        const int entryCount = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
166                NGRAM_COUNT_KEYS[getIndexFromNgramType(ngramType)], 0 /* defaultValue */);
167        entryCounters.setNgramCount(ngramType, entryCount);
168    }
169    return entryCounters.getEntryCounts();
170}
171
172/* static */ const EntryCounts HeaderPolicy::readMaxNgramCounts() const {
173    MutableEntryCounters entryCounters;
174    for (const auto ngramType : AllNgramTypes::ASCENDING) {
175        const int index = getIndexFromNgramType(ngramType);
176        const int maxEntryCount = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
177                MAX_NGRAM_COUNT_KEYS[index], DEFAULT_MAX_NGRAM_COUNTS[index]);
178        entryCounters.setNgramCount(ngramType, maxEntryCount);
179    }
180    return entryCounters.getEntryCounts();
181}
182
183} // namespace latinime
184