1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Matcher"
18
19#include <stdlib.h>
20
21#include "IcuUtilities.h"
22#include "JNIHelp.h"
23#include "JniConstants.h"
24#include "JniException.h"
25#include "ScopedPrimitiveArray.h"
26#include "jni.h"
27#include "unicode/parseerr.h"
28#include "unicode/regex.h"
29
30// ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
31
32static icu::RegexMatcher* toRegexMatcher(jlong address) {
33    return reinterpret_cast<icu::RegexMatcher*>(static_cast<uintptr_t>(address));
34}
35
36/**
37 * We use ICU4C's RegexMatcher class, but our input is on the Java heap and potentially moving
38 * around between calls. This wrapper class ensures that our RegexMatcher is always pointing at
39 * the current location of the char[]. Earlier versions of Android simply copied the data to the
40 * native heap, but that's wasteful and hides allocations from the garbage collector.
41 */
42class MatcherAccessor {
43public:
44    MatcherAccessor(JNIEnv* env, jlong address, jstring javaInput, bool reset) {
45        init(env, address);
46
47        mJavaInput = javaInput;
48        mChars = env->GetStringChars(mJavaInput, NULL);
49        if (mChars == NULL) {
50            return;
51        }
52
53        mUText = utext_openUChars(NULL, mChars, env->GetStringLength(mJavaInput), &mStatus);
54        if (mUText == NULL) {
55            return;
56        }
57
58        if (reset) {
59            mMatcher->reset(mUText);
60        } else {
61            mMatcher->refreshInputText(mUText, mStatus);
62        }
63    }
64
65    MatcherAccessor(JNIEnv* env, jlong address) {
66        init(env, address);
67    }
68
69    ~MatcherAccessor() {
70        utext_close(mUText);
71        if (mJavaInput) {
72            mEnv->ReleaseStringChars(mJavaInput, mChars);
73        }
74        maybeThrowIcuException(mEnv, "utext_close", mStatus);
75    }
76
77    icu::RegexMatcher* operator->() {
78        return mMatcher;
79    }
80
81    UErrorCode& status() {
82        return mStatus;
83    }
84
85    void updateOffsets(jintArray javaOffsets) {
86        ScopedIntArrayRW offsets(mEnv, javaOffsets);
87        if (offsets.get() == NULL) {
88            return;
89        }
90
91        for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) {
92            offsets[2*i + 0] = mMatcher->start(i, mStatus);
93            offsets[2*i + 1] = mMatcher->end(i, mStatus);
94        }
95    }
96
97private:
98    void init(JNIEnv* env, jlong address) {
99        mEnv = env;
100        mJavaInput = NULL;
101        mMatcher = toRegexMatcher(address);
102        mChars = NULL;
103        mStatus = U_ZERO_ERROR;
104        mUText = NULL;
105    }
106
107    JNIEnv* mEnv;
108    jstring mJavaInput;
109    icu::RegexMatcher* mMatcher;
110    const jchar* mChars;
111    UErrorCode mStatus;
112    UText* mUText;
113
114    // Disallow copy and assignment.
115    MatcherAccessor(const MatcherAccessor&);
116    void operator=(const MatcherAccessor&);
117};
118
119static void Matcher_free(void* address) {
120    delete reinterpret_cast<icu::RegexMatcher*>(address);
121}
122
123static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
124    return reinterpret_cast<jlong>(&Matcher_free);
125}
126
127// Return a guess of the amount of native memory to be deallocated by a typical call to
128// Matcher_free().
129static jint Matcher_nativeSize(JNIEnv*, jclass) {
130    return 200;  // Very rough guess based on a quick look at the implementation.
131}
132
133static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint startIndex, jintArray offsets) {
134    MatcherAccessor matcher(env, addr, javaText, false);
135    UBool result = matcher->find(startIndex, matcher.status());
136    if (result) {
137        matcher.updateOffsets(offsets);
138    }
139    return result;
140}
141
142static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
143    MatcherAccessor matcher(env, addr, javaText, false);
144    if (matcher.status() != U_ZERO_ERROR) {
145        return -1;
146    }
147    UBool result = matcher->find();
148    if (result) {
149        matcher.updateOffsets(offsets);
150    }
151    return result;
152}
153
154static jint Matcher_groupCountImpl(JNIEnv* env, jclass, jlong addr) {
155    MatcherAccessor matcher(env, addr);
156    return matcher->groupCount();
157}
158
159static jint Matcher_hitEndImpl(JNIEnv* env, jclass, jlong addr) {
160    MatcherAccessor matcher(env, addr);
161    return matcher->hitEnd();
162}
163
164static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
165    MatcherAccessor matcher(env, addr, javaText, false);
166    UBool result = matcher->lookingAt(matcher.status());
167    if (result) {
168        matcher.updateOffsets(offsets);
169    }
170    return result;
171}
172
173static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
174    MatcherAccessor matcher(env, addr, javaText, false);
175    UBool result = matcher->matches(matcher.status());
176    if (result) {
177        matcher.updateOffsets(offsets);
178    }
179    return result;
180}
181
182static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) {
183    icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
184    UErrorCode status = U_ZERO_ERROR;
185    icu::RegexMatcher* result = pattern->matcher(status);
186    maybeThrowIcuException(env, "RegexPattern::matcher", status);
187    return reinterpret_cast<uintptr_t>(result);
188}
189
190static jint Matcher_requireEndImpl(JNIEnv* env, jclass, jlong addr) {
191    MatcherAccessor matcher(env, addr);
192    return matcher->requireEnd();
193}
194
195static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
196    MatcherAccessor matcher(env, addr, javaText, true);
197    matcher->region(start, end, matcher.status());
198}
199
200static void Matcher_useAnchoringBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
201    MatcherAccessor matcher(env, addr);
202    matcher->useAnchoringBounds(value);
203}
204
205static void Matcher_useTransparentBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
206    MatcherAccessor matcher(env, addr);
207    matcher->useTransparentBounds(value);
208}
209
210static JNINativeMethod gMethods[] = {
211    NATIVE_METHOD(Matcher, findImpl, "(JLjava/lang/String;I[I)Z"),
212    NATIVE_METHOD(Matcher, findNextImpl, "(JLjava/lang/String;[I)Z"),
213    NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
214    NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
215    NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
216    NATIVE_METHOD(Matcher, lookingAtImpl, "(JLjava/lang/String;[I)Z"),
217    NATIVE_METHOD(Matcher, matchesImpl, "(JLjava/lang/String;[I)Z"),
218    NATIVE_METHOD(Matcher, nativeSize, "()I"),
219    NATIVE_METHOD(Matcher, openImpl, "(J)J"),
220    NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
221    NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"),
222    NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"),
223    NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"),
224};
225void register_java_util_regex_Matcher(JNIEnv* env) {
226    jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods));
227}
228