bigram_dict_content.h revision b22f95ec8a8325e65b95e7b0d57156854f7984f6
1/*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_BIGRAM_DICT_CONTENT_H
18#define LATINIME_BIGRAM_DICT_CONTENT_H
19
20#include <cstdint>
21#include <cstdio>
22
23#include "defines.h"
24#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
25#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
26#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
27#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
28
29namespace latinime {
30
31class BigramDictContent : public SparseTableDictContent {
32 public:
33    BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
34            : SparseTableDictContent(buffers, bufferSizes,
35                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
36                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
37              mHasHistoricalInfo(hasHistoricalInfo) {}
38
39    BigramDictContent(const bool hasHistoricalInfo)
40            : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
41                      Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
42              mHasHistoricalInfo(hasHistoricalInfo) {}
43
44    int getContentTailPos() const {
45        return getContentBuffer()->getTailPosition();
46    }
47
48    const BigramEntry getBigramEntry(const int bigramEntryPos) const {
49        int readingPos = bigramEntryPos;
50        return getBigramEntryAndAdvancePosition(&readingPos);
51    }
52
53    const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
54
55    // Returns head position of bigram list for a PtNode specified by terminalId.
56    int getBigramListHeadPos(const int terminalId) const {
57        const SparseTable *const addressLookupTable = getAddressLookupTable();
58        if (!addressLookupTable->contains(terminalId)) {
59            return NOT_A_DICT_POS;
60        }
61        return addressLookupTable->get(terminalId);
62    }
63
64    bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
65        int writingPos = getContentBuffer()->getTailPosition();
66        return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
67    }
68
69    bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
70        int writingPos = entryWritingPos;
71        return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
72    }
73
74    bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
75            int *const entryWritingPos);
76
77    bool writeTerminator(const int writingPos) {
78        // Terminator is a link to the invalid position.
79        return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
80    }
81
82    bool writeLink(const int linkedPos, const int writingPos);
83
84    bool createNewBigramList(const int terminalId) {
85        const int bigramListPos = getContentBuffer()->getTailPosition();
86        return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
87    }
88
89    bool flushToFile(FILE *const file) const {
90        return flush(file);
91    }
92
93    bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
94            const BigramDictContent *const originalBigramDictContent,
95            int *const outBigramEntryCount);
96
97    int getBigramEntrySize() const {
98        if (mHasHistoricalInfo) {
99            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
100                    + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
101                    + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
102                    + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
103                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
104        } else {
105            return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
106                    + Ver4DictConstants::PROBABILITY_SIZE
107                    + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
108        }
109    }
110
111 private:
112    DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
113
114    static const int INVALID_LINKED_ENTRY_POS;
115
116    bool writeBigramEntryAttributesAndAdvancePosition(
117            const bool isLink, const int probability, const int targetTerminalId,
118            const int timestamp, const int level, const int count, int *const entryWritingPos);
119
120    bool runGCBigramList(const int bigramListPos,
121            const BigramDictContent *const sourceBigramDictContent, const int toPos,
122            const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
123            int *const outEntryCount);
124
125    bool mHasHistoricalInfo;
126};
127} // namespace latinime
128#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
129