1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Matcher"
18
19#include <memory>
20#include <stdlib.h>
21
22#include <android-base/logging.h>
23
24#include "IcuUtilities.h"
25#include "JNIHelp.h"
26#include "JniConstants.h"
27#include "JniException.h"
28#include "ScopedJavaUnicodeString.h"
29#include "ScopedPrimitiveArray.h"
30#include "ScopedStringChars.h"
31#include "jni.h"
32#include "unicode/parseerr.h"
33#include "unicode/regex.h"
34
35// ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
36
37/**
38 * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
39 * the input it's currently operating on in the native heap.
40 *
41 * Rationale: We choose to make a copy here because it turns out to be a lot
42 * cheaper when a moving GC and/or string compression is enabled. This is
43 * because env->GetStringChars() always copies in this scenario. This becomes
44 * especially bad when the String in question is long and/or contains a large
45 * number of matches.
46 *
47 * Drawbacks: The native allocation associated with this class is no longer
48 * fixed size, so we're effectively lying to the NativeAllocationRegistry about
49 * the size of the object(s) we're allocating on the native heap. The peak
50 * memory usage doesn't change though, given that GetStringChars would have
51 * made an allocation of precisely the same size.
52 */
53class MatcherState {
54public:
55    MatcherState(icu::RegexMatcher* matcher) :
56        mMatcher(matcher),
57        mUChars(nullptr),
58        mUText(nullptr),
59        mStatus(U_ZERO_ERROR) {
60    }
61
62    bool updateInput(JNIEnv* env, jstring input) {
63        // First, close the UText struct, since we're about to allocate a new one.
64        if (mUText != nullptr) {
65            utext_close(mUText);
66            mUText = nullptr;
67        }
68
69        // Then delete the UChar* associated with the UText struct..
70        mUChars.reset(nullptr);
71
72        // TODO: We should investigate whether we can avoid an additional copy
73        // in the native heap when is_copy == JNI_TRUE. The problem with doing
74        // that is that we might call ReleaseStringChars with a different
75        // JNIEnv* on a different downcall. This is currently safe as
76        // implemented in ART, but is unlikely to be portable and the spec stays
77        // silent on the matter.
78        ScopedStringChars inputChars(env, input);
79        if (inputChars.get() == nullptr) {
80            // There will be an exception pending if we get here.
81            return false;
82        }
83
84        // Make a copy of |input| on the native heap. This copy will be live
85        // until the next call to updateInput or close.
86        mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
87        if (mUChars.get() == nullptr) {
88            env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
89            return false;
90        }
91
92        static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
93        memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
94
95        // Reset any errors that might have occurred on previous patches.
96        mStatus = U_ZERO_ERROR;
97        mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
98        if (mUText == nullptr) {
99            CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
100            return false;
101        }
102
103        // It is an error for ICU to have returned a non-null mUText but to
104        // still have indicated an error.
105        CHECK(U_SUCCESS(mStatus));
106
107        mMatcher->reset(mUText);
108        return true;
109    }
110
111    ~MatcherState() {
112        if (mUText != nullptr) {
113            utext_close(mUText);
114        }
115    }
116
117    icu::RegexMatcher* matcher() {
118        return mMatcher.get();
119    }
120
121    UErrorCode& status() {
122        return mStatus;
123    }
124
125    void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
126        ScopedIntArrayRW offsets(env, javaOffsets);
127        if (offsets.get() == NULL) {
128            return;
129        }
130
131        for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) {
132            offsets[2*i + 0] = mMatcher->start(i, mStatus);
133            offsets[2*i + 1] = mMatcher->end(i, mStatus);
134        }
135    }
136
137private:
138    std::unique_ptr<icu::RegexMatcher> mMatcher;
139    std::unique_ptr<UChar[]> mUChars;
140    UText* mUText;
141    UErrorCode mStatus;
142
143    // Disallow copy and assignment.
144    MatcherState(const MatcherState&);
145    void operator=(const MatcherState&);
146};
147
148static inline MatcherState* toMatcherState(jlong address) {
149    return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
150}
151
152static void Matcher_free(void* address) {
153    MatcherState* state = reinterpret_cast<MatcherState*>(address);
154    delete state;
155}
156
157static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
158    return reinterpret_cast<jlong>(&Matcher_free);
159}
160
161// Return a guess of the amount of native memory to be deallocated by a typical call to
162// Matcher_free().
163static jint Matcher_nativeSize(JNIEnv*, jclass) {
164    return 200;  // Very rough guess based on a quick look at the implementation.
165}
166
167static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
168    MatcherState* state = toMatcherState(addr);
169    UBool result = state->matcher()->find(startIndex, state->status());
170    if (result) {
171        state->updateOffsets(env, offsets);
172    }
173    return result;
174}
175
176static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
177    MatcherState* state = toMatcherState(addr);
178    UBool result = state->matcher()->find();
179    if (result) {
180        state->updateOffsets(env, offsets);
181    }
182    return result;
183}
184
185static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
186    MatcherState* state = toMatcherState(addr);
187    return state->matcher()->groupCount();
188}
189
190static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
191    MatcherState* state = toMatcherState(addr);
192    return state->matcher()->hitEnd();
193}
194
195static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
196    MatcherState* state = toMatcherState(addr);
197    UBool result = state->matcher()->lookingAt(state->status());
198    if (result) {
199        state->updateOffsets(env, offsets);
200    }
201    return result;
202}
203
204static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
205    MatcherState* state = toMatcherState(addr);
206    UBool result = state->matcher()->matches(state->status());
207    if (result) {
208        state->updateOffsets(env, offsets);
209    }
210    return result;
211}
212
213static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) {
214    icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
215    UErrorCode status = U_ZERO_ERROR;
216    icu::RegexMatcher* result = pattern->matcher(status);
217    if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
218        return 0;
219    }
220
221    return reinterpret_cast<uintptr_t>(new MatcherState(result));
222}
223
224static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
225    MatcherState* state = toMatcherState(addr);
226    return state->matcher()->requireEnd();
227}
228
229static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
230    MatcherState* state = toMatcherState(addr);
231    if (state->updateInput(env, javaText)) {
232        state->matcher()->region(start, end, state->status());
233    }
234}
235
236static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
237    MatcherState* state = toMatcherState(addr);
238    state->matcher()->useAnchoringBounds(value);
239}
240
241static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
242    MatcherState* state = toMatcherState(addr);
243    state->matcher()->useTransparentBounds(value);
244}
245
246static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
247  icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
248  ScopedJavaUnicodeString groupName(env, javaGroupName);
249  UErrorCode status = U_ZERO_ERROR;
250
251  jint result = pattern->groupNumberFromName(groupName.unicodeString(), status);
252  if (U_SUCCESS(status)) {
253    return result;
254  }
255  if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) {
256    return -1;
257  }
258  maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status);
259  return -1;
260}
261
262
263static JNINativeMethod gMethods[] = {
264    NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
265    NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
266    NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
267    NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
268    NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
269    NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
270    NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
271    NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
272    NATIVE_METHOD(Matcher, nativeSize, "()I"),
273    NATIVE_METHOD(Matcher, openImpl, "(J)J"),
274    NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
275    NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"),
276    NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"),
277    NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"),
278};
279void register_java_util_regex_Matcher(JNIEnv* env) {
280    jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods));
281}
282