1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_NGRAM_CONTEXT_H
18#define LATINIME_NGRAM_CONTEXT_H
19
20#include <array>
21
22#include "defines.h"
23#include "utils/int_array_view.h"
24
25namespace latinime {
26
27class DictionaryStructureWithBufferPolicy;
28
29class NgramContext {
30 public:
31    // No prev word information.
32    NgramContext();
33    // Copy constructor to use this class with std::vector and use this class as a return value.
34    NgramContext(const NgramContext &ngramContext);
35    // Construct from previous words.
36    NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
37            const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
38            const size_t prevWordCount);
39    // Construct from a previous word.
40    NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
41            const bool isBeginningOfSentence);
42
43    size_t getPrevWordCount() const {
44        return mPrevWordCount;
45    }
46    bool isValid() const;
47
48    template<size_t N>
49    const WordIdArrayView getPrevWordIds(
50            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
51            WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
52        for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
53            prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
54                    mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
55        }
56        return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
57    }
58
59    // n is 1-indexed.
60    const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
61    // n is 1-indexed.
62    bool isNthPrevWordBeginningOfSentence(const size_t n) const;
63
64 private:
65    DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
66
67    static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
68            const int *const wordCodePoints, const int wordCodePointCount,
69            const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
70    void clear();
71
72    const size_t mPrevWordCount;
73    int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
74    int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
75    bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
76};
77} // namespace latinime
78#endif // LATINIME_NGRAM_CONTEXT_H
79