1e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi/* 2e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Copyright (C) 2014 The Android Open Source Project 3e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * 4e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 5e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 6e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * You may obtain a copy of the License at 7e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * 8e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 9e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * 10e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 11e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 12e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * See the License for the specific language governing permissions and 14e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi * limitations under the License. 15e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi */ 16e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi 17e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi#include "utils/jni_data_utils.h" 18e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi 19ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi#include "utils/int_array_view.h" 20ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 21e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanaginamespace latinime { 22e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi 23e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagiconst int JniDataUtils::CODE_POINT_REPLACEMENT_CHARACTER = 0xFFFD; 24e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagiconst int JniDataUtils::CODE_POINT_NULL = 0; 25e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi 26ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi/* static */ void JniDataUtils::outputWordProperty(JNIEnv *const env, 27ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const WordProperty &wordProperty, jintArray outCodePoints, jbooleanArray outFlags, 28ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, 29ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, 30ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jobject outNgramProbabilities, jobject outShortcutTargets, 31ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jobject outShortcutProbabilities) { 32ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const CodePointArrayView codePoints = wordProperty.getCodePoints(); 33ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, 34ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi MAX_WORD_LENGTH /* maxLength */, codePoints.data(), codePoints.size(), 35ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi false /* needsNullTermination */); 36ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const UnigramProperty &unigramProperty = wordProperty.getUnigramProperty(); 37ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const std::vector<NgramProperty> &ngrams = wordProperty.getNgramProperties(); 38ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jboolean flags[] = {unigramProperty.isNotAWord(), unigramProperty.isPossiblyOffensive(), 39ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi !ngrams.empty(), unigramProperty.hasShortcuts(), 40ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi unigramProperty.representsBeginningOfSentence()}; 41ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->SetBooleanArrayRegion(outFlags, 0 /* start */, NELEMS(flags), flags); 42ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const HistoricalInfo &historicalInfo = unigramProperty.getHistoricalInfo(); 43ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi int probabilityInfo[] = {unigramProperty.getProbability(), historicalInfo.getTimestamp(), 44ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi historicalInfo.getLevel(), historicalInfo.getCount()}; 45ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->SetIntArrayRegion(outProbabilityInfo, 0 /* start */, NELEMS(probabilityInfo), 46ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi probabilityInfo); 47ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 48ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jclass integerClass = env->FindClass("java/lang/Integer"); 49ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jmethodID intToIntegerConstructorId = env->GetMethodID(integerClass, "<init>", "(I)V"); 50ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jclass arrayListClass = env->FindClass("java/util/ArrayList"); 51ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); 52ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 53ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi // Output ngrams. 54ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jclass intArrayClass = env->FindClass("[I"); 55ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi for (const auto &ngramProperty : ngrams) { 56ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const NgramContext *const ngramContext = ngramProperty.getNgramContext(); 57ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jobjectArray prevWordWordCodePointsArray = env->NewObjectArray( 58ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi ngramContext->getPrevWordCount(), intArrayClass, nullptr); 59ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jbooleanArray prevWordIsBeginningOfSentenceArray = 60ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->NewBooleanArray(ngramContext->getPrevWordCount()); 61ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi for (size_t i = 0; i < ngramContext->getPrevWordCount(); ++i) { 62ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const CodePointArrayView codePoints = ngramContext->getNthPrevWordCodePoints(i + 1); 63ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jintArray prevWordCodePoints = env->NewIntArray(codePoints.size()); 64ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi JniDataUtils::outputCodePoints(env, prevWordCodePoints, 0 /* start */, 65ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi codePoints.size(), codePoints.data(), codePoints.size(), 66ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi false /* needsNullTermination */); 67ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->SetObjectArrayElement(prevWordWordCodePointsArray, i, prevWordCodePoints); 68ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(prevWordCodePoints); 69ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi JniDataUtils::putBooleanToArray(env, prevWordIsBeginningOfSentenceArray, i, 70ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi ngramContext->isNthPrevWordBeginningOfSentence(i + 1)); 71ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi } 72ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outNgramPrevWordsArray, addMethodId, prevWordWordCodePointsArray); 73ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outNgramPrevWordIsBeginningOfSentenceArray, addMethodId, 74ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi prevWordIsBeginningOfSentenceArray); 75ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(prevWordWordCodePointsArray); 76ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(prevWordIsBeginningOfSentenceArray); 77ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 78ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const std::vector<int> *const targetWordCodePoints = ngramProperty.getTargetCodePoints(); 79ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jintArray targetWordCodePointArray = env->NewIntArray(targetWordCodePoints->size()); 80ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi JniDataUtils::outputCodePoints(env, targetWordCodePointArray, 0 /* start */, 81ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi targetWordCodePoints->size(), targetWordCodePoints->data(), 82ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi targetWordCodePoints->size(), false /* needsNullTermination */); 83ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outNgramTargets, addMethodId, targetWordCodePointArray); 84ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(targetWordCodePointArray); 85ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 86ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const HistoricalInfo &ngramHistoricalInfo = ngramProperty.getHistoricalInfo(); 87ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi int bigramProbabilityInfo[] = {ngramProperty.getProbability(), 88ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi ngramHistoricalInfo.getTimestamp(), ngramHistoricalInfo.getLevel(), 89ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi ngramHistoricalInfo.getCount()}; 90ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jintArray bigramProbabilityInfoArray = env->NewIntArray(NELEMS(bigramProbabilityInfo)); 91ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->SetIntArrayRegion(bigramProbabilityInfoArray, 0 /* start */, 92ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi NELEMS(bigramProbabilityInfo), bigramProbabilityInfo); 93ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outNgramProbabilities, addMethodId, bigramProbabilityInfoArray); 94ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(bigramProbabilityInfoArray); 95ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi } 96ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 97ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi // Output shortcuts. 98ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi for (const auto &shortcut : unigramProperty.getShortcuts()) { 99ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi const std::vector<int> *const targetCodePoints = shortcut.getTargetCodePoints(); 100ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jintArray shortcutTargetCodePointArray = env->NewIntArray(targetCodePoints->size()); 101ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi JniDataUtils::outputCodePoints(env, shortcutTargetCodePointArray, 0 /* start */, 102ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi targetCodePoints->size(), targetCodePoints->data(), targetCodePoints->size(), 103ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi false /* needsNullTermination */); 104ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outShortcutTargets, addMethodId, shortcutTargetCodePointArray); 105ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(shortcutTargetCodePointArray); 106ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi jobject integerProbability = env->NewObject(integerClass, intToIntegerConstructorId, 107ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi shortcut.getProbability()); 108ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->CallBooleanMethod(outShortcutProbabilities, addMethodId, integerProbability); 109ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(integerProbability); 110ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi } 111ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(integerClass); 112ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi env->DeleteLocalRef(arrayListClass); 113ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi} 114ad546afbaa1eea4b291ed021f86fab7652b5d205Keisuke Kuroyanagi 115e9121a68a67b8723477668130a16d4c72d98f6feKeisuke Kuroyanagi} // namespace latinime 116