bigram_dict_content.cpp revision 804f7450fc94ad47c2a12ec9c1183a244f3f1a17
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
18
19#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
20
21namespace latinime {
22
23const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
24
25const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
26        int *const bigramEntryPos) const {
27    const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
28    const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
29    if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
30        AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
31                "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
32                        bigramListBuffer->getTailPosition());
33        ASSERT(false);
34        return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
35                Ver4DictConstants::NOT_A_TERMINAL_ID);
36    }
37    const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
38            Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
39    const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
40    int probability = NOT_A_PROBABILITY;
41    int timestamp = NOT_A_TIMESTAMP;
42    int level = 0;
43    int count = 0;
44    if (mHasHistoricalInfo) {
45        timestamp = bigramListBuffer->readUintAndAdvancePosition(
46                Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
47        level = bigramListBuffer->readUintAndAdvancePosition(
48                Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
49        count = bigramListBuffer->readUintAndAdvancePosition(
50                Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
51    } else {
52        probability = bigramListBuffer->readUintAndAdvancePosition(
53                Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
54    }
55    const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
56            Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
57    const int targetTerminalId =
58            (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
59                    Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
60    if (isLink) {
61        const int linkedEntryPos = targetTerminalId;
62        if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
63            // Bigram list terminator is found.
64            return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
65                    Ver4DictConstants::NOT_A_TERMINAL_ID);
66        }
67        *bigramEntryPos = linkedEntryPos;
68        return getBigramEntryAndAdvancePosition(bigramEntryPos);
69    }
70    // hasNext is always true because we should continue to read the next entry until the terminator
71    // is found.
72    if (mHasHistoricalInfo) {
73        const HistoricalInfo historicalInfo(timestamp, level, count);
74        return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
75    } else {
76        return BigramEntry(true /* hasNext */, probability, targetTerminalId);
77    }
78}
79
80bool BigramDictContent::writeBigramEntryAndAdvancePosition(
81        const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
82    return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
83            bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
84            bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
85            bigramEntryToWrite->getHistoricalInfo()->getLevel(),
86            bigramEntryToWrite->getHistoricalInfo()->getCount(),
87            entryWritingPos);
88}
89
90bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
91        const bool isLink, const int probability, const int targetTerminalId,
92        const int timestamp, const int level, const int count, int *const entryWritingPos) {
93    BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
94    const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
95    if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
96            Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
97        AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
98        return false;
99    }
100    if (mHasHistoricalInfo) {
101        if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
102                Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
103            AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
104                    timestamp);
105            return false;
106        }
107        if (!bigramListBuffer->writeUintAndAdvancePosition(level,
108                Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
109            AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
110                    level);
111            return false;
112        }
113        if (!bigramListBuffer->writeUintAndAdvancePosition(count,
114                Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
115            AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
116                    count);
117            return false;
118        }
119    } else {
120        if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
121                Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
122            AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
123                    probability);
124            return false;
125        }
126    }
127    const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
128            Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
129    if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
130            Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
131        AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
132                *entryWritingPos, targetTerminalId);
133        return false;
134    }
135    return true;
136}
137
138bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
139    const int targetTerminalId = linkedEntryPos;
140    int pos = writingPos;
141    return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
142            NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
143            0 /* count */, &pos);
144}
145
146bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
147        const BigramDictContent *const originalBigramDictContent,
148        int *const outBigramEntryCount) {
149    for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
150            it != terminalIdMap->end(); ++it) {
151        const int originalBigramListPos =
152                originalBigramDictContent->getBigramListHeadPos(it->first);
153        if (originalBigramListPos == NOT_A_DICT_POS) {
154            // This terminal does not have a bigram list.
155            continue;
156        }
157        const int bigramListPos = getContentBuffer()->getTailPosition();
158        int bigramEntryCount = 0;
159        // Copy bigram list with GC from original content.
160        if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
161                terminalIdMap, &bigramEntryCount)) {
162            AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
163                    originalBigramListPos, bigramListPos);
164            return false;
165        }
166        if (bigramEntryCount == 0) {
167            // All bigram entries are useless. This terminal does not have a bigram list.
168            continue;
169        }
170        *outBigramEntryCount += bigramEntryCount;
171        // Set bigram list position to the lookup table.
172        if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
173            AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
174                    it->second, bigramListPos);
175            return false;
176        }
177    }
178    return true;
179}
180
181// Returns whether GC for the bigram list was succeeded or not.
182bool BigramDictContent::runGCBigramList(const int bigramListPos,
183        const BigramDictContent *const sourceBigramDictContent, const int toPos,
184        const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
185        int *const outEntryCount) {
186    bool hasNext = true;
187    int readingPos = bigramListPos;
188    int writingPos = toPos;
189    while (hasNext) {
190        const BigramEntry originalBigramEntry =
191                sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
192        hasNext = originalBigramEntry.hasNext();
193        if (!originalBigramEntry.isValid()) {
194            continue;
195        }
196        TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
197                terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
198        if (it == terminalIdMap->end()) {
199            // Target word has been removed.
200            continue;
201        }
202        const BigramEntry updatedBigramEntry =
203                originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
204        if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
205            AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
206            return false;
207        }
208        *outEntryCount += 1;
209    }
210    if (*outEntryCount > 0) {
211        if (!writeTerminator(writingPos)) {
212            AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
213            return false;
214        }
215    }
216    return true;
217}
218
219} // namespace latinime
220