bigram_dict_content.h revision 2ac934296c0571ea252f3fb5a23fba29eb89c666
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_BIGRAM_DICT_CONTENT_H 18#define LATINIME_BIGRAM_DICT_CONTENT_H 19 20#include <cstdint> 21#include <cstdio> 22 23#include "defines.h" 24#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" 25#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" 26#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" 27#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" 28 29namespace latinime { 30 31class BigramDictContent : public SparseTableDictContent { 32 public: 33 BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo, 34 const bool isUpdatable) 35 : SparseTableDictContent(buffers, bufferSizes, isUpdatable, 36 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 37 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 38 mHasHistoricalInfo(hasHistoricalInfo) {} 39 40 BigramDictContent(const bool hasHistoricalInfo) 41 : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 42 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 43 mHasHistoricalInfo(hasHistoricalInfo) {} 44 45 int getContentTailPos() const { 46 return getContentBuffer()->getTailPosition(); 47 } 48 49 const BigramEntry getBigramEntry(const int bigramEntryPos) const { 50 int readingPos = bigramEntryPos; 51 return getBigramEntryAndAdvancePosition(&readingPos); 52 } 53 54 const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; 55 56 // Returns head position of bigram list for a PtNode specified by terminalId. 57 int getBigramListHeadPos(const int terminalId) const { 58 const SparseTable *const addressLookupTable = getAddressLookupTable(); 59 if (!addressLookupTable->contains(terminalId)) { 60 return NOT_A_DICT_POS; 61 } 62 return addressLookupTable->get(terminalId); 63 } 64 65 bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) { 66 int writingPos = getContentBuffer()->getTailPosition(); 67 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 68 } 69 70 bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { 71 int writingPos = entryWritingPos; 72 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 73 } 74 75 bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, 76 int *const entryWritingPos); 77 78 bool writeTerminator(const int writingPos) { 79 // Terminator is a link to the invalid position. 80 return writeLink(INVALID_LINKED_ENTRY_POS, writingPos); 81 } 82 83 bool writeLink(const int linkedPos, const int writingPos); 84 85 bool createNewBigramList(const int terminalId) { 86 const int bigramListPos = getContentBuffer()->getTailPosition(); 87 return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); 88 } 89 90 bool flushToFile(FILE *const file) const { 91 return flush(file); 92 } 93 94 bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 95 const BigramDictContent *const originalBigramDictContent, 96 int *const outBigramEntryCount); 97 98 int getBigramEntrySize() const { 99 if (mHasHistoricalInfo) { 100 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 101 + Ver4DictConstants::TIME_STAMP_FIELD_SIZE 102 + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE 103 + Ver4DictConstants::WORD_COUNT_FIELD_SIZE 104 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 105 } else { 106 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 107 + Ver4DictConstants::PROBABILITY_SIZE 108 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 109 } 110 } 111 112 private: 113 DISALLOW_COPY_AND_ASSIGN(BigramDictContent); 114 115 static const int INVALID_LINKED_ENTRY_POS; 116 117 bool writeBigramEntryAttributesAndAdvancePosition( 118 const bool isLink, const int probability, const int targetTerminalId, 119 const int timestamp, const int level, const int count, int *const entryWritingPos); 120 121 bool runGCBigramList(const int bigramListPos, 122 const BigramDictContent *const sourceBigramDictContent, const int toPos, 123 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 124 int *const outEntryCount); 125 126 bool mHasHistoricalInfo; 127}; 128} // namespace latinime 129#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ 130