jni_data_utils.h revision 16cc3992d7468ef781753df7b4227330e0834501
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef LATINIME_JNI_DATA_UTILS_H 18#define LATINIME_JNI_DATA_UTILS_H 19 20#include <vector> 21 22#include "defines.h" 23#include "jni.h" 24#include "suggest/core/session/ngram_context.h" 25#include "suggest/core/policy/dictionary_header_structure_policy.h" 26#include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" 27#include "utils/char_utils.h" 28 29namespace latinime { 30 31class JniDataUtils { 32 public: 33 static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) { 34 if (!array) { 35 outVector->clear(); 36 return; 37 } 38 const jsize arrayLength = env->GetArrayLength(array); 39 outVector->resize(arrayLength); 40 env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data()); 41 } 42 43 static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env, 44 jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { 45 DictionaryHeaderStructurePolicy::AttributeMap attributeMap; 46 const int keyCount = env->GetArrayLength(attributeKeyStringArray); 47 for (int i = 0; i < keyCount; i++) { 48 jstring keyString = static_cast<jstring>( 49 env->GetObjectArrayElement(attributeKeyStringArray, i)); 50 const jsize keyUtf8Length = env->GetStringUTFLength(keyString); 51 char keyChars[keyUtf8Length + 1]; 52 env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); 53 env->DeleteLocalRef(keyString); 54 keyChars[keyUtf8Length] = '\0'; 55 DictionaryHeaderStructurePolicy::AttributeMap::key_type key; 56 HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); 57 58 jstring valueString = static_cast<jstring>( 59 env->GetObjectArrayElement(attributeValueStringArray, i)); 60 const jsize valueUtf8Length = env->GetStringUTFLength(valueString); 61 char valueChars[valueUtf8Length + 1]; 62 env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); 63 env->DeleteLocalRef(valueString); 64 valueChars[valueUtf8Length] = '\0'; 65 DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; 66 HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); 67 attributeMap[key] = value; 68 } 69 return attributeMap; 70 } 71 72 static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, 73 const int maxLength, const int *const codePoints, const int codePointCount, 74 const bool needsNullTermination) { 75 const int codePointBufSize = std::min(maxLength, codePointCount); 76 int outputCodePonts[codePointBufSize]; 77 int outputCodePointCount = 0; 78 for (int i = 0; i < codePointBufSize; ++i) { 79 const int codePoint = codePoints[i]; 80 int codePointToOutput = codePoint; 81 if (!CharUtils::isInUnicodeSpace(codePoint)) { 82 if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { 83 // Just skip Beginning-of-Sentence marker. 84 continue; 85 } 86 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 87 } else if (codePoint >= 0x01 && codePoint <= 0x1F) { 88 // Control code. 89 codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; 90 } 91 outputCodePonts[outputCodePointCount++] = codePointToOutput; 92 } 93 env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, 94 outputCodePonts); 95 if (needsNullTermination && outputCodePointCount < maxLength) { 96 env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, 97 1 /* len */, &CODE_POINT_NULL); 98 } 99 } 100 101 static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays, 102 jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) { 103 int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; 104 int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 105 bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 106 for (size_t i = 0; i < prevWordCount; ++i) { 107 prevWordCodePointCount[i] = 0; 108 isBeginningOfSentence[i] = false; 109 jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); 110 if (!prevWord) { 111 continue; 112 } 113 jsize prevWordLength = env->GetArrayLength(prevWord); 114 if (prevWordLength > MAX_WORD_LENGTH) { 115 continue; 116 } 117 env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); 118 env->DeleteLocalRef(prevWord); 119 prevWordCodePointCount[i] = prevWordLength; 120 jboolean isBeginningOfSentenceBoolean = JNI_FALSE; 121 env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, 122 &isBeginningOfSentenceBoolean); 123 isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; 124 } 125 return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, 126 prevWordCount); 127 } 128 129 static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, 130 const jboolean value) { 131 env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); 132 } 133 134 static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { 135 env->SetIntArrayRegion(array, index, 1 /* len */, &value); 136 } 137 138 static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index, 139 const float value) { 140 env->SetFloatArrayRegion(array, index, 1 /* len */, &value); 141 } 142 143 private: 144 DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); 145 146 static const int CODE_POINT_REPLACEMENT_CHARACTER; 147 static const int CODE_POINT_NULL; 148}; 149} // namespace latinime 150#endif // LATINIME_JNI_DATA_UTILS_H 151