1e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi/*
2e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project
3e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi *
4e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
5e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
6e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * You may obtain a copy of the License at
7e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi *
8e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
9e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi *
10e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
11e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
12e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * See the License for the specific language governing permissions and
14e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * limitations under the License.
15e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi */
16e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi
17e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi#include "utils/jni_data_utils.h"
18e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi
19ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi#include "utils/int_array_view.h"
20ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
21e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanaginamespace latinime {
22e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi
23e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagiconst int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD;
24e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagiconst int JniDataUtils::CODE_POINT_NULL = 0;
25e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi
26ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi/* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env,
27ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags,
28ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jintArray outProbabilityInfo, jobject outNgramPrevWordsArray,
29ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets,
30ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jobject outNgramProbabilities, jobject outShortcutTargets,
31ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jobject outShortcutProbabilities) {
32ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    const CodePointArrayView codePoints = wordProperty.getCodePoints();
33ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
34ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(),
35ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            false /* needsNullTermination */);
36ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty();
37ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties();
38ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(),
39ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            !ngrams.empty(), unigramProperty.hasShortcuts(),
40ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            unigramProperty.representsBeginningOfSentence()};
41ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags);
42ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo();
43ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(),
44ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            historicalInfo.getLevel(), historicalInfo.getCount()};
45ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo),
46ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            probabilityInfo);
47ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
48ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jclass integerClass = env->FindClass("java/lang/Integer");
49ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V");
50ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jclass arrayListClass = env->FindClass("java/util/ArrayList");
51ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
52ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
53ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    // Output ngrams.
54ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    jclass intArrayClass = env->FindClass("[I");
55ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    for (const auto &ngramProperty : ngrams) {
56ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        const NgramContext *const ngramContext = ngramProperty.getNgramContext();
57ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jobjectArray prevWordWordCodePointsArray = env->NewObjectArray(
58ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                ngramContext->getPrevWordCount(), intArrayClass, nullptr);
59ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jbooleanArray prevWordIsBeginningOfSentenceArray =
60ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                env->NewBooleanArray(ngramContext->getPrevWordCount());
61ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) {
62ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1);
63ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            jintArray prevWordCodePoints = env->NewIntArray(codePoints.size());
64ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */,
65ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                    codePoints.size(), codePoints.data(), codePoints.size(),
66ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                    false /* needsNullTermination */);
67ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints);
68ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            env->DeleteLocalRef(prevWordCodePoints);
69ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi            JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i,
70ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                    ngramContext->isNthPrevWordBeginningOfSentence(i + 1));
71ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        }
72ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray);
73ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId,
74ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                prevWordIsBeginningOfSentenceArray);
75ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(prevWordWordCodePointsArray);
76ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray);
77ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
78ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints();
79ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size());
80ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */,
81ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                targetWordCodePoints->size(), targetWordCodePoints->data(),
82ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                targetWordCodePoints->size(), false /* needsNullTermination */);
83ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray);
84ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(targetWordCodePointArray);
85ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
86ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo();
87ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        int bigramProbabilityInfo[] = {ngramProperty.getProbability(),
88ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(),
89ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                ngramHistoricalInfo.getCount()};
90ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo));
91ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */,
92ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                NELEMS(bigramProbabilityInfo), bigramProbabilityInfo);
93ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray);
94ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(bigramProbabilityInfoArray);
95ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    }
96ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
97ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    // Output shortcuts.
98ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    for (const auto &shortcut : unigramProperty.getShortcuts()) {
99ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints();
100ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size());
101ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */,
102ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(),
103ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                false /* needsNullTermination */);
104ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray);
105ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(shortcutTargetCodePointArray);
106ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId,
107ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi                shortcut.getProbability());
108ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability);
109ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi        env->DeleteLocalRef(integerProbability);
110ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    }
111ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    env->DeleteLocalRef(integerClass);
112ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi    env->DeleteLocalRef(arrayListClass);
113ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi}
114ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi
115e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi} // namespace latinime
116