1b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi/*
2b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
3b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi *
4b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
6b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * You may obtain a copy of the License at
7b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi *
8b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
9b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi *
10b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * See the License for the specific language governing permissions and
14b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi * limitations under the License.
15b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi */
16b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
1772e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#ifndef LATINIME_NGRAM_CONTEXT_H
1872e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#define LATINIME_NGRAM_CONTEXT_H
19b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
20c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi#include <array>
21c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi
22b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi#include "defines.h"
236ae4d79d81aa7aea5529d95bb3eb960a273ef411Keisuke Kuroyanagi#include "utils/int_array_view.h"
24b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
25b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanaginamespace latinime {
26b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
27f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagiclass DictionaryStructureWithBufferPolicy;
28f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi
2972e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagiclass NgramContext {
30b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi public:
31b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi    // No prev word information.
32f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    NgramContext();
33f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    // Copy constructor to use this class with std::vector and use this class as a return value.
34f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    NgramContext(const NgramContext &ngramContext);
3505b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi    // Construct from previous words.
3672e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
3705b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi            const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
38f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi            const size_t prevWordCount);
3905b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi    // Construct from a previous word.
4072e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
41f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi            const bool isBeginningOfSentence);
4245d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi
4329777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi    size_t getPrevWordCount() const {
4429777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi        return mPrevWordCount;
4529777e3a8a419c7c897637372c908566c6490e90Keisuke Kuroyanagi    }
46f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    bool isValid() const;
479f8c9a0161924f515c5ff9617db2317cdc1d01e2Keisuke Kuroyanagi
48c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi    template<size_t N>
49c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi    const WordIdArrayView getPrevWordIds(
50c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
51f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi            WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
52c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi        for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
53f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi            prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
54f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi                    mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
5545d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi        }
56c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi        return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
57b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi    }
58b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
599f8c9a0161924f515c5ff9617db2317cdc1d01e2Keisuke Kuroyanagi    // n is 1-indexed.
60f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
6122931cd94155b5623b9fa52c0596a44aa89bf606Keisuke Kuroyanagi    // n is 1-indexed.
62f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    bool isNthPrevWordBeginningOfSentence(const size_t n) const;
6322931cd94155b5623b9fa52c0596a44aa89bf606Keisuke Kuroyanagi
64b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi private:
6572e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi    DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
66b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi
6789a003b12b5e2408b908a8afed498b0425e2c1c8Keisuke Kuroyanagi    static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
6845d1a936a7a318286c4404951db1bd825e25cc7cKeisuke Kuroyanagi            const int *const wordCodePoints, const int wordCodePointCount,
69f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi            const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
70f87bb77a9183d126847d5925c2b03bec45fabd6dKeisuke Kuroyanagi    void clear();
71b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi
72c43b6664faedff7f97df24fbc07e1c1c6c4a9106Keisuke Kuroyanagi    const size_t mPrevWordCount;
7305b1e0d42f9f103516103d4d33e61862c0851e9dKeisuke Kuroyanagi    int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
74b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi    int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
75b94ec1437b624a45ad5c0fde2dd385116e5e1163Keisuke Kuroyanagi    bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
76b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi};
77b87fffb8be3dc6a79e49890a7700704d7fee616bKeisuke Kuroyanagi} // namespace latinime
7872e2383d11cf09735b378dcedd20c9fc43da1f12Keisuke Kuroyanagi#endif // LATINIME_NGRAM_CONTEXT_H
79