bigram_dict_content.h revision 2ac934296c0571ea252f3fb5a23fba29eb89c666
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
18#define LATINIME_BIGRAM_DICT_CONTENT_H
19
20#include <cstdint>
21#include <cstdio>
22
23#include "defines.h"
24#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
25#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
26#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
27#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
28
29namespace latinime {
30
31class BigramDictContent : public SparseTableDictContent {
32 public:
33    BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo,
34            const bool isUpdatable)
35            : SparseTableDictContent(buffers, bufferSizes, isUpdatable,
36                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
37                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
38              mHasHistoricalInfo(hasHistoricalInfo) {}
39
40    BigramDictContent(const bool hasHistoricalInfo)
41            : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
42                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
43              mHasHistoricalInfo(hasHistoricalInfo) {}
44
45    int getContentTailPos() const {
46        return getContentBuffer()->getTailPosition();
47    }
48
49    const BigramEntry getBigramEntry(const int bigramEntryPos) const {
50        int readingPos = bigramEntryPos;
51        return getBigramEntryAndAdvancePosition(&readingPos);
52    }
53
54    const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
55
56    // Returns head position of bigram list for a PtNode specified by terminalId.
57    int getBigramListHeadPos(const int terminalId) const {
58        const SparseTable *const addressLookupTable = getAddressLookupTable();
59        if (!addressLookupTable->contains(terminalId)) {
60            return NOT_A_DICT_POS;
61        }
62        return addressLookupTable->get(terminalId);
63    }
64
65    bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
66        int writingPos = getContentBuffer()->getTailPosition();
67        return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
68    }
69
70    bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
71        int writingPos = entryWritingPos;
72        return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
73    }
74
75    bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
76            int *const entryWritingPos);
77
78    bool writeTerminator(const int writingPos) {
79        // Terminator is a link to the invalid position.
80        return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
81    }
82
83    bool writeLink(const int linkedPos, const int writingPos);
84
85    bool createNewBigramList(const int terminalId) {
86        const int bigramListPos = getContentBuffer()->getTailPosition();
87        return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
88    }
89
90    bool flushToFile(FILE *const file) const {
91        return flush(file);
92    }
93
94    bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
95            const BigramDictContent *const originalBigramDictContent,
96            int *const outBigramEntryCount);
97
98    int getBigramEntrySize() const {
99        if (mHasHistoricalInfo) {
100            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
101                    + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
102                    + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
103                    + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
104                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
105        } else {
106            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
107                    + Ver4DictConstants::PROBABILITY_SIZE
108                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
109        }
110    }
111
112 private:
113    DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
114
115    static const int INVALID_LINKED_ENTRY_POS;
116
117    bool writeBigramEntryAttributesAndAdvancePosition(
118            const bool isLink, const int probability, const int targetTerminalId,
119            const int timestamp, const int level, const int count, int *const entryWritingPos);
120
121    bool runGCBigramList(const int bigramListPos,
122            const BigramDictContent *const sourceBigramDictContent, const int toPos,
123            const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
124            int *const outEntryCount);
125
126    bool mHasHistoricalInfo;
127};
128} // namespace latinime
129#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
130