bigram_dict_content.h revision 804f7450fc94ad47c2a12ec9c1183a244f3f1a17
1/* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_BIGRAM_DICT_CONTENT_H 18#define LATINIME_BIGRAM_DICT_CONTENT_H 19 20#include "defines.h" 21#include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" 22#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" 23#include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" 24#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" 25 26namespace latinime { 27 28class BigramDictContent : public SparseTableDictContent { 29 public: 30 BigramDictContent(const char *const dictPath, const bool hasHistoricalInfo, 31 const bool isUpdatable) 32 : SparseTableDictContent(dictPath, 33 Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, 34 Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, 35 Ver4DictConstants::BIGRAM_FILE_EXTENSION, isUpdatable, 36 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 37 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 38 mHasHistoricalInfo(hasHistoricalInfo) {} 39 40 BigramDictContent(const bool hasHistoricalInfo) 41 : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 42 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 43 mHasHistoricalInfo(hasHistoricalInfo) {} 44 45 int getContentTailPos() const { 46 return getContentBuffer()->getTailPosition(); 47 } 48 49 const BigramEntry getBigramEntry(const int bigramEntryPos) const { 50 int readingPos = bigramEntryPos; 51 return getBigramEntryAndAdvancePosition(&readingPos); 52 } 53 54 const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; 55 56 // Returns head position of bigram list for a PtNode specified by terminalId. 57 int getBigramListHeadPos(const int terminalId) const { 58 const SparseTable *const addressLookupTable = getAddressLookupTable(); 59 if (!addressLookupTable->contains(terminalId)) { 60 return NOT_A_DICT_POS; 61 } 62 return addressLookupTable->get(terminalId); 63 } 64 65 bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) { 66 int writingPos = getContentBuffer()->getTailPosition(); 67 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 68 } 69 70 bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { 71 int writingPos = entryWritingPos; 72 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 73 } 74 75 bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, 76 int *const entryWritingPos); 77 78 bool writeTerminator(const int writingPos) { 79 // Terminator is a link to the invalid position. 80 return writeLink(INVALID_LINKED_ENTRY_POS, writingPos); 81 } 82 83 bool writeLink(const int linkedPos, const int writingPos); 84 85 bool createNewBigramList(const int terminalId) { 86 const int bigramListPos = getContentBuffer()->getTailPosition(); 87 return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); 88 } 89 90 bool flushToFile(const char *const dictPath) const { 91 return flush(dictPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, 92 Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, 93 Ver4DictConstants::BIGRAM_FILE_EXTENSION); 94 } 95 96 bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 97 const BigramDictContent *const originalBigramDictContent, 98 int *const outBigramEntryCount); 99 100 int getBigramEntrySize() const { 101 if (mHasHistoricalInfo) { 102 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 103 + Ver4DictConstants::TIME_STAMP_FIELD_SIZE 104 + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE 105 + Ver4DictConstants::WORD_COUNT_FIELD_SIZE 106 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 107 } else { 108 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 109 + Ver4DictConstants::PROBABILITY_SIZE 110 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 111 } 112 } 113 114 private: 115 DISALLOW_COPY_AND_ASSIGN(BigramDictContent); 116 117 static const int INVALID_LINKED_ENTRY_POS; 118 119 bool writeBigramEntryAttributesAndAdvancePosition( 120 const bool isLink, const int probability, const int targetTerminalId, 121 const int timestamp, const int level, const int count, int *const entryWritingPos); 122 123 bool runGCBigramList(const int bigramListPos, 124 const BigramDictContent *const sourceBigramDictContent, const int toPos, 125 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 126 int *const outEntryCount); 127 128 bool mHasHistoricalInfo; 129}; 130} // namespace latinime 131#endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ 132