1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "Matcher" 18 19#include <memory> 20#include <stdlib.h> 21 22#include <android-base/logging.h> 23 24#include "IcuUtilities.h" 25#include "JNIHelp.h" 26#include "JniConstants.h" 27#include "JniException.h" 28#include "ScopedJavaUnicodeString.h" 29#include "ScopedPrimitiveArray.h" 30#include "ScopedStringChars.h" 31#include "jni.h" 32#include "unicode/parseerr.h" 33#include "unicode/regex.h" 34 35// ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html 36 37/** 38 * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of 39 * the input it's currently operating on in the native heap. 40 * 41 * Rationale: We choose to make a copy here because it turns out to be a lot 42 * cheaper when a moving GC and/or string compression is enabled. This is 43 * because env->GetStringChars() always copies in this scenario. This becomes 44 * especially bad when the String in question is long and/or contains a large 45 * number of matches. 46 * 47 * Drawbacks: The native allocation associated with this class is no longer 48 * fixed size, so we're effectively lying to the NativeAllocationRegistry about 49 * the size of the object(s) we're allocating on the native heap. The peak 50 * memory usage doesn't change though, given that GetStringChars would have 51 * made an allocation of precisely the same size. 52 */ 53class MatcherState { 54public: 55 MatcherState(icu::RegexMatcher* matcher) : 56 mMatcher(matcher), 57 mUChars(nullptr), 58 mUText(nullptr), 59 mStatus(U_ZERO_ERROR) { 60 } 61 62 bool updateInput(JNIEnv* env, jstring input) { 63 // First, close the UText struct, since we're about to allocate a new one. 64 if (mUText != nullptr) { 65 utext_close(mUText); 66 mUText = nullptr; 67 } 68 69 // Then delete the UChar* associated with the UText struct.. 70 mUChars.reset(nullptr); 71 72 // TODO: We should investigate whether we can avoid an additional copy 73 // in the native heap when is_copy == JNI_TRUE. The problem with doing 74 // that is that we might call ReleaseStringChars with a different 75 // JNIEnv* on a different downcall. This is currently safe as 76 // implemented in ART, but is unlikely to be portable and the spec stays 77 // silent on the matter. 78 ScopedStringChars inputChars(env, input); 79 if (inputChars.get() == nullptr) { 80 // There will be an exception pending if we get here. 81 return false; 82 } 83 84 // Make a copy of |input| on the native heap. This copy will be live 85 // until the next call to updateInput or close. 86 mUChars.reset(new (std::nothrow) UChar[inputChars.size()]); 87 if (mUChars.get() == nullptr) { 88 env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory"); 89 return false; 90 } 91 92 static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)"); 93 memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar)); 94 95 // Reset any errors that might have occurred on previous patches. 96 mStatus = U_ZERO_ERROR; 97 mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus); 98 if (mUText == nullptr) { 99 CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus)); 100 return false; 101 } 102 103 // It is an error for ICU to have returned a non-null mUText but to 104 // still have indicated an error. 105 CHECK(U_SUCCESS(mStatus)); 106 107 mMatcher->reset(mUText); 108 return true; 109 } 110 111 ~MatcherState() { 112 if (mUText != nullptr) { 113 utext_close(mUText); 114 } 115 } 116 117 icu::RegexMatcher* matcher() { 118 return mMatcher.get(); 119 } 120 121 UErrorCode& status() { 122 return mStatus; 123 } 124 125 void updateOffsets(JNIEnv* env, jintArray javaOffsets) { 126 ScopedIntArrayRW offsets(env, javaOffsets); 127 if (offsets.get() == NULL) { 128 return; 129 } 130 131 for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) { 132 offsets[2*i + 0] = mMatcher->start(i, mStatus); 133 offsets[2*i + 1] = mMatcher->end(i, mStatus); 134 } 135 } 136 137private: 138 std::unique_ptr<icu::RegexMatcher> mMatcher; 139 std::unique_ptr<UChar[]> mUChars; 140 UText* mUText; 141 UErrorCode mStatus; 142 143 // Disallow copy and assignment. 144 MatcherState(const MatcherState&); 145 void operator=(const MatcherState&); 146}; 147 148static inline MatcherState* toMatcherState(jlong address) { 149 return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address)); 150} 151 152static void Matcher_free(void* address) { 153 MatcherState* state = reinterpret_cast<MatcherState*>(address); 154 delete state; 155} 156 157static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) { 158 return reinterpret_cast<jlong>(&Matcher_free); 159} 160 161// Return a guess of the amount of native memory to be deallocated by a typical call to 162// Matcher_free(). 163static jint Matcher_nativeSize(JNIEnv*, jclass) { 164 return 200; // Very rough guess based on a quick look at the implementation. 165} 166 167static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) { 168 MatcherState* state = toMatcherState(addr); 169 UBool result = state->matcher()->find(startIndex, state->status()); 170 if (result) { 171 state->updateOffsets(env, offsets); 172 } 173 return result; 174} 175 176static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { 177 MatcherState* state = toMatcherState(addr); 178 UBool result = state->matcher()->find(); 179 if (result) { 180 state->updateOffsets(env, offsets); 181 } 182 return result; 183} 184 185static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) { 186 MatcherState* state = toMatcherState(addr); 187 return state->matcher()->groupCount(); 188} 189 190static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) { 191 MatcherState* state = toMatcherState(addr); 192 return state->matcher()->hitEnd(); 193} 194 195static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { 196 MatcherState* state = toMatcherState(addr); 197 UBool result = state->matcher()->lookingAt(state->status()); 198 if (result) { 199 state->updateOffsets(env, offsets); 200 } 201 return result; 202} 203 204static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { 205 MatcherState* state = toMatcherState(addr); 206 UBool result = state->matcher()->matches(state->status()); 207 if (result) { 208 state->updateOffsets(env, offsets); 209 } 210 return result; 211} 212 213static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) { 214 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr)); 215 UErrorCode status = U_ZERO_ERROR; 216 icu::RegexMatcher* result = pattern->matcher(status); 217 if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) { 218 return 0; 219 } 220 221 return reinterpret_cast<uintptr_t>(new MatcherState(result)); 222} 223 224static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) { 225 MatcherState* state = toMatcherState(addr); 226 return state->matcher()->requireEnd(); 227} 228 229static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) { 230 MatcherState* state = toMatcherState(addr); 231 if (state->updateInput(env, javaText)) { 232 state->matcher()->region(start, end, state->status()); 233 } 234} 235 236static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { 237 MatcherState* state = toMatcherState(addr); 238 state->matcher()->useAnchoringBounds(value); 239} 240 241static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { 242 MatcherState* state = toMatcherState(addr); 243 state->matcher()->useTransparentBounds(value); 244} 245 246static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) { 247 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr)); 248 ScopedJavaUnicodeString groupName(env, javaGroupName); 249 UErrorCode status = U_ZERO_ERROR; 250 251 jint result = pattern->groupNumberFromName(groupName.unicodeString(), status); 252 if (U_SUCCESS(status)) { 253 return result; 254 } 255 if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) { 256 return -1; 257 } 258 maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status); 259 return -1; 260} 261 262 263static JNINativeMethod gMethods[] = { 264 NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"), 265 NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"), 266 NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"), 267 NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"), 268 NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"), 269 NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"), 270 NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"), 271 NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"), 272 NATIVE_METHOD(Matcher, nativeSize, "()I"), 273 NATIVE_METHOD(Matcher, openImpl, "(J)J"), 274 NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"), 275 NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"), 276 NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"), 277 NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"), 278}; 279void register_java_util_regex_Matcher(JNIEnv* env) { 280 jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods)); 281} 282