1b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi/* 2b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 3b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * 4b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 6b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * You may obtain a copy of the License at 7b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * 8b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * 10b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * See the License for the specific language governing permissions and 14b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * limitations under the License. 15b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi */ 16b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 1772e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#ifndef LATINIME_NGRAM_CONTEXT_H 1872e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#define LATINIME_NGRAM_CONTEXT_H 19b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 20c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi#include <array> 21c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi 22b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi#include "defines.h" 236ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi#include "utils/int_array_view.h" 24b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 25b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanaginamespace latinime { 26b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 27f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagiclass DictionaryStructureWithBufferPolicy; 28f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi 2972e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagiclass NgramContext { 30b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi public: 31b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi // No prev word information. 32f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi NgramContext(); 33f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi // Copy constructor to use this class with std::vector and use this class as a return value. 34f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi NgramContext(const NgramContext &ngramContext); 3505b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi // Construct from previous words. 3672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], 3705b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, 38f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi const size_t prevWordCount); 3905b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi // Construct from a previous word. 4072e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, 41f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi const bool isBeginningOfSentence); 4245d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi 4329777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi size_t getPrevWordCount() const { 4429777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi return mPrevWordCount; 4529777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi } 46f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi bool isValid() const; 479f8c9a0161924f515c5ff9617db2317cdc1d01e2Keisuke Kuroyanagi 48c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi template<size_t N> 49c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi const WordIdArrayView getPrevWordIds( 50c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, 51f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const { 52c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) { 53f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i], 54f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch); 5545d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi } 56c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount); 57b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi } 58b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 599f8c9a0161924f515c5ff9617db2317cdc1d01e2Keisuke Kuroyanagi // n is 1-indexed. 60f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const; 6122931cd94155b5623b9fa52c0596a44aa89bf606Keisuke Kuroyanagi // n is 1-indexed. 62f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi bool isNthPrevWordBeginningOfSentence(const size_t n) const; 6322931cd94155b5623b9fa52c0596a44aa89bf606Keisuke Kuroyanagi 64b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi private: 6572e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi DISALLOW_ASSIGNMENT_OPERATOR(NgramContext); 66b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi 6789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, 6845d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi const int *const wordCodePoints, const int wordCodePointCount, 69f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi const bool isBeginningOfSentence, const bool tryLowerCaseSearch); 70f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi void clear(); 71b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi 72c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi const size_t mPrevWordCount; 7305b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; 74b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 75b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 76b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi}; 77b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi} // namespace latinime 7872e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#endif // LATINIME_NGRAM_CONTEXT_H 79