16bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi/* 26bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * Copyright (C) 2013, The Android Open Source Project 36bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * 46bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 56bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 66bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * You may obtain a copy of the License at 76bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * 86bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 96bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * 106bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 116bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 126bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 136bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * See the License for the specific language governing permissions and 146bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * limitations under the License. 156bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi */ 166bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 176bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi/* 186bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * !!!!! DO NOT EDIT THIS FILE !!!!! 196bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * 206bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi * This file was generated from 2188bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi * dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp 226bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi */ 236bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h" 256bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 266bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi#include <cstring> 276bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi#include <queue> 286bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/header/header_policy.h" 3088bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" 3188bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h" 3288bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_dict_buffers.h" 3388bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_dict_constants.h" 3488bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h" 3588bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h" 3688bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/structure/backward/v402/ver4_pt_node_array_reader.h" 3788bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/buffer_with_extendable_buffer.h" 3888bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/file_utils.h" 3988bc312ad34321fb3e81be2dc939a889d065f4a7Keisuke Kuroyanagi#include "dictionary/utils/forgetting_curve_utils.h" 406bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 416bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanaginamespace latinime { 426bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanaginamespace backward { 4307e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanaginamespace v402 { 446bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 456bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath, 46e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi const EntryCounts &entryCounts) const { 476bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); 486bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi BufferWithExtendableBuffer headerBuffer( 496bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); 506bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int extendedRegionSize = headerPolicy->getExtendedRegionSize() 516bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); 526bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */, 53e8750d970eed61b9239d8b2fa19648b8457696c1Keisuke Kuroyanagi entryCounts, extendedRegionSize, &headerBuffer)) { 546bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot write header structure to buffer. " 556bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " 5678212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi "extendedRegionSize: %d", false, entryCounts.getNgramCount(NgramType::Unigram), 5778212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi entryCounts.getNgramCount(NgramType::Bigram), extendedRegionSize); 586bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 596bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 606bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); 616bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 626bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 636bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, 646bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const char *const dictDirPath) { 656bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy(); 666bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( 676bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4DictBuffers::createVer4DictBuffers(headerPolicy, 686bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4DictConstants::MAX_DICTIONARY_SIZE)); 696bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi int unigramCount = 0; 706bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi int bigramCount = 0; 716bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) { 726bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 736bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 746bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi BufferWithExtendableBuffer headerBuffer( 756bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); 7678212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi MutableEntryCounters entryCounters; 7778212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi entryCounters.setNgramCount(NgramType::Unigram, unigramCount); 7878212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi entryCounters.setNgramCount(NgramType::Bigram, bigramCount); 796bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */, 8078212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi entryCounters.getEntryCounts(), 0 /* extendedRegionSize */, &headerBuffer)) { 816bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 826bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 836bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer); 846bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 856bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 866bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, 876bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite, 886bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi int *const outUnigramCount, int *const outBigramCount) { 896bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(), 906bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getProbabilityDictContent(), headerPolicy); 916bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer()); 926bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(), 936bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getTerminalPositionLookupTable(), headerPolicy); 946bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(), 956bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getTerminalPositionLookupTable()); 966bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(), 976bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, 986bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &shortcutPolicy); 996bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 1006bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader); 1016bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 1026bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtGcEventListeners 1036bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted 1046bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( 1056bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &ptNodeWriter); 1066bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 1076bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { 1086bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1096bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1106bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted 1116bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi .getValidUnigramCount(); 11278212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi const int maxUnigramCount = headerPolicy->getMaxNgramCounts().getNgramCount(NgramType::Unigram); 1136bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) { 1146bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) { 1156bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount, 1166bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi maxUnigramCount); 1176bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1186bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1196bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1206bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 1216bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 1226bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability 1236bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi traversePolicyToUpdateBigramProbability(&ptNodeWriter); 1246bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 1256bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &traversePolicyToUpdateBigramProbability)) { 1266bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1276bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1286bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount(); 12978212a6d3de2c1fdaa394c58a16cbdee3ad5d046Keisuke Kuroyanagi const int maxBigramCount = headerPolicy->getMaxNgramCounts().getNgramCount(NgramType::Bigram); 1306bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) { 1316bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!truncateBigrams(maxBigramCount)) { 1326bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount); 1336bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1346bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1356bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1366bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 1376bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Mapping from positions in mBuffer to positions in bufferToWrite. 1386bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap; 1396bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 1406bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(), 1416bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy, 1426bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &shortcutPolicy); 1436bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer 1446bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers, 1456bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap); 1466bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 1476bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { 1486bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1496bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1506bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 1516bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Create policy instances for the GCed dictionary. 1526bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(), 1536bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite->getProbabilityDictContent(), headerPolicy); 1546bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer()); 1556bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(), 1566bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite->getTerminalPositionLookupTable(), headerPolicy); 1576bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(), 1586bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite->getTerminalPositionLookupTable()); 1596bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(), 1606bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy, 1616bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &newShortcutPolicy); 1626bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Re-assign terminal IDs for valid terminal PtNodes. 1636bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi TerminalPositionLookupTable::TerminalIdMap terminalIdMap; 1646bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds( 1656bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &terminalIdMap)) { 1666bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1676bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1686bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Run GC for probability dict content. 1696bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap, 1706bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getProbabilityDictContent())) { 1716bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1726bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1736bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Run GC for bigram dict content. 1746bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap, 1756bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getBigramDictContent(), outBigramCount)) { 1766bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1776bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1786bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Run GC for shortcut dict content. 1796bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap, 1806bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getShortcutDictContent())) { 1816bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1826bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1836bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader); 1846bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 1856bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields 1866bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap); 1876bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 1886bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &traversePolicyToUpdateAllPositionFields)) { 1896bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1906bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1916bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 1926bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds 1936bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap); 1946bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 1956bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) { 1966bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 1976bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 1986bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); 1996bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return true; 2006bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 2016bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2026bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::truncateUnigrams( 2036bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const Ver4PatriciaTrieNodeReader *const ptNodeReader, 2046bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) { 2056bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const TerminalPositionLookupTable *const terminalPosLookupTable = 2066bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getTerminalPositionLookupTable(); 2076bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); 2086bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator> 2096bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi priorityQueue; 2106bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi for (int i = 0; i < nextTerminalId; ++i) { 2116bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i); 2126bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (terminalPos == NOT_A_DICT_POS) { 2136bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi continue; 2146bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2156bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const ProbabilityEntry probabilityEntry = 2166bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getProbabilityDictContent()->getProbabilityEntry(i); 2176bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int probability = probabilityEntry.hasHistoricalInfo() ? 2186bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi ForgettingCurveUtils::decodeProbability( 2196bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : 2206bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi probabilityEntry.getProbability(); 2216bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi priorityQueue.push(DictProbability(terminalPos, probability, 222287e155e44b4e937f2a62d010805702bc813c43bKeisuke Kuroyanagi probabilityEntry.getHistoricalInfo()->getTimestamp())); 2236bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2246bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2256bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Delete unigrams. 2266bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) { 2276bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int ptNodePos = priorityQueue.top().getDictPos(); 22807e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanagi priorityQueue.pop(); 2296bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const PtNodeParams ptNodeParams = 2300fbca1ac2388db81a443c1705732130564c3f714Keisuke Kuroyanagi ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 23107e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanagi if (ptNodeParams.representsNonWordInfo()) { 23207e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanagi continue; 23307e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanagi } 2346bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) { 2356bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos); 2366bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 2376bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2386bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2396bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return true; 2406bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 2416bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2426bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) { 2436bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const TerminalPositionLookupTable *const terminalPosLookupTable = 2446bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mBuffers->getTerminalPositionLookupTable(); 2456bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int nextTerminalId = terminalPosLookupTable->getNextTerminalId(); 2466bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator> 2476bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi priorityQueue; 2486bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent(); 2496bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi for (int i = 0; i < nextTerminalId; ++i) { 2506bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int bigramListPos = bigramDictContent->getBigramListHeadPos(i); 2516bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (bigramListPos == NOT_A_DICT_POS) { 2526bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi continue; 2536bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2546bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi bool hasNext = true; 2556bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi int readingPos = bigramListPos; 2566bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi while (hasNext) { 2576bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int entryPos = readingPos; 2586bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const BigramEntry bigramEntry = 2596bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 2606bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi hasNext = bigramEntry.hasNext(); 2616bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!bigramEntry.isValid()) { 2626bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi continue; 2636bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2646bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int probability = bigramEntry.hasHistoricalInfo() ? 2656bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi ForgettingCurveUtils::decodeProbability( 2666bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) : 2676bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi bigramEntry.getProbability(); 2686bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi priorityQueue.push(DictProbability(entryPos, probability, 269287e155e44b4e937f2a62d010805702bc813c43bKeisuke Kuroyanagi bigramEntry.getHistoricalInfo()->getTimestamp())); 2706bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2716bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2726bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2736bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi // Delete bigrams. 2746bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi while (static_cast<int>(priorityQueue.size()) > maxBigramCount) { 2756bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const int entryPos = priorityQueue.top().getDictPos(); 2766bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos); 2776bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry(); 2786bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) { 2796bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos); 2806bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 2816bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2826bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi priorityQueue.pop(); 2836bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2846bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return true; 2856bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 2866bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 2876bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagibool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds 2886bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) { 2896bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!ptNodeParams->isTerminal()) { 2906bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return true; 2916bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2926bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi TerminalPositionLookupTable::TerminalIdMap::const_iterator it = 2936bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi mTerminalIdMap->find(ptNodeParams->getTerminalId()); 2946bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (it == mTerminalIdMap->end()) { 2956bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd", 2966bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi ptNodeParams->getTerminalId(), mTerminalIdMap->size()); 2976bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return false; 2986bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 2996bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) { 3006bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second); 3016bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi } 3026bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams); 3036bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} 3046bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi 30507e14126318f7661f76fdce421d723d64e7ea8deKeisuke Kuroyanagi} // namespace v402 3066bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} // namespace backward 3076bf268132d60061fd26bd8cba63a12b56b22056eKeisuke Kuroyanagi} // namespace latinime 308