1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef LATINIME_JNI_DATA_UTILS_H
18#define LATINIME_JNI_DATA_UTILS_H
19
20#include <vector>
21
22#include "defines.h"
23#include "dictionary/header/header_read_write_utils.h"
24#include "dictionary/interface/dictionary_header_structure_policy.h"
25#include "dictionary/property/ngram_context.h"
26#include "dictionary/property/word_property.h"
27#include "jni.h"
28#include "utils/char_utils.h"
29
30namespace latinime {
31
32class JniDataUtils {
33 public:
34    static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) {
35        if (!array) {
36            outVector->clear();
37            return;
38        }
39        const jsize arrayLength = env->GetArrayLength(array);
40        outVector->resize(arrayLength);
41        env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data());
42    }
43
44    static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env,
45            jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) {
46        DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
47        const int keyCount = env->GetArrayLength(attributeKeyStringArray);
48        for (int i = 0; i < keyCount; i++) {
49            jstring keyString = static_cast<jstring>(
50                    env->GetObjectArrayElement(attributeKeyStringArray, i));
51            const jsize keyUtf8Length = env->GetStringUTFLength(keyString);
52            char keyChars[keyUtf8Length + 1];
53            env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars);
54            env->DeleteLocalRef(keyString);
55            keyChars[keyUtf8Length] = '\0';
56            DictionaryHeaderStructurePolicy::AttributeMap::key_type key;
57            HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key);
58
59            jstring valueString = static_cast<jstring>(
60                    env->GetObjectArrayElement(attributeValueStringArray, i));
61            const jsize valueUtf8Length = env->GetStringUTFLength(valueString);
62            char valueChars[valueUtf8Length + 1];
63            env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars);
64            env->DeleteLocalRef(valueString);
65            valueChars[valueUtf8Length] = '\0';
66            DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value;
67            HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value);
68            attributeMap[key] = value;
69        }
70        return attributeMap;
71    }
72
73    static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start,
74            const int maxLength, const int *const codePoints, const int codePointCount,
75            const bool needsNullTermination) {
76        const int codePointBufSize = std::min(maxLength, codePointCount);
77        int outputCodePonts[codePointBufSize];
78        int outputCodePointCount = 0;
79        for (int i = 0; i < codePointBufSize; ++i) {
80            const int codePoint = codePoints[i];
81            int codePointToOutput = codePoint;
82            if (!CharUtils::isInUnicodeSpace(codePoint)) {
83                if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) {
84                    // Just skip Beginning-of-Sentence marker.
85                    continue;
86                }
87                codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
88            } else if (codePoint >= 0x01 && codePoint <= 0x1F) {
89                // Control code.
90                codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER;
91            }
92            outputCodePonts[outputCodePointCount++] = codePointToOutput;
93        }
94        env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount,
95                outputCodePonts);
96        if (needsNullTermination && outputCodePointCount < maxLength) {
97            env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount,
98                    1 /* len */, &CODE_POINT_NULL);
99        }
100    }
101
102    static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
103            jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
104        int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
105        int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
106        bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
107        for (size_t i = 0; i < prevWordCount; ++i) {
108            prevWordCodePointCount[i] = 0;
109            isBeginningOfSentence[i] = false;
110            jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i);
111            if (!prevWord) {
112                continue;
113            }
114            jsize prevWordLength = env->GetArrayLength(prevWord);
115            if (prevWordLength > MAX_WORD_LENGTH) {
116                continue;
117            }
118            env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]);
119            env->DeleteLocalRef(prevWord);
120            prevWordCodePointCount[i] = prevWordLength;
121            jboolean isBeginningOfSentenceBoolean = JNI_FALSE;
122            env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */,
123                    &isBeginningOfSentenceBoolean);
124            isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
125        }
126        return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
127                prevWordCount);
128    }
129
130    static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index,
131            const jboolean value) {
132        env->SetBooleanArrayRegion(array, index, 1 /* len */, &value);
133    }
134
135    static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) {
136        env->SetIntArrayRegion(array, index, 1 /* len */, &value);
137    }
138
139    static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index,
140            const float value) {
141        env->SetFloatArrayRegion(array, index, 1 /* len */, &value);
142    }
143
144    static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty,
145            jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo,
146            jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray,
147            jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets,
148            jobject outShortcutProbabilities);
149
150 private:
151    DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils);
152
153    static const int CODE_POINT_REPLACEMENT_CHARACTER;
154    static const int CODE_POINT_NULL;
155};
156} // namespace latinime
157#endif // LATINIME_JNI_DATA_UTILS_H
158