dictionary/property/ngram_context.h

/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef LATINIME_NGRAM_CONTEXT_H
#define LATINIME_NGRAM_CONTEXT_H

#include <array>

#include "defines.h"
#include "utils/int_array_view.h"

namespace latinime {

class DictionaryStructureWithBufferPolicy;

class NgramContext {
 public:
    // No prev word information.
    NgramContext();
    // Copy constructor to use this class with std::vector and use this class as a return value.
    NgramContext(const NgramContext &ngramContext);
    // Construct from previous words.
    NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
            const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
            const size_t prevWordCount);
    // Construct from a previous word.
    NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
            const bool isBeginningOfSentence);

    size_t getPrevWordCount() const {
        return mPrevWordCount;
    }
    bool isValid() const;

    template<size_t N>
    const WordIdArrayView getPrevWordIds(
            const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
        for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
            prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
                    mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
        }
        return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
    }

    // n is 1-indexed.
    const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
    // n is 1-indexed.
    bool isNthPrevWordBeginningOfSentence(const size_t n) const;

 private:
    DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);

    static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
            const int *const wordCodePoints, const int wordCodePointCount,
            const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
    void clear();

    const size_t mPrevWordCount;
    int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
    int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_NGRAM_CONTEXT_H