1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#define LOG_TAG "Pattern" 18 19#include <stdlib.h> 20 21#include "JNIHelp.h" 22#include "JniConstants.h" 23#include "ScopedJavaUnicodeString.h" 24#include "jni.h" 25#include "unicode/parseerr.h" 26#include "unicode/regex.h" 27 28// ICU documentation: http://icu-project.org/apiref/icu4c/classRegexPattern.html 29 30static const char* regexDetailMessage(UErrorCode status) { 31 // These human-readable error messages were culled from "utypes.h", and then slightly tuned 32 // to make more sense in context. 33 // If we don't have a special-case, we'll just return the textual name of 34 // the enum value (such as U_REGEX_RULE_SYNTAX), which is better than nothing. 35 switch (status) { 36 case U_REGEX_INTERNAL_ERROR: return "An internal error was detected"; 37 case U_REGEX_RULE_SYNTAX: return "Syntax error in regexp pattern"; 38 case U_REGEX_INVALID_STATE: return "Matcher in invalid state for requested operation"; 39 case U_REGEX_BAD_ESCAPE_SEQUENCE: return "Unrecognized backslash escape sequence in pattern"; 40 case U_REGEX_PROPERTY_SYNTAX: return "Incorrect Unicode property"; 41 case U_REGEX_UNIMPLEMENTED: return "Use of unimplemented feature"; 42 case U_REGEX_MISMATCHED_PAREN: return "Incorrectly nested parentheses in regexp pattern"; 43 case U_REGEX_NUMBER_TOO_BIG: return "Decimal number is too large"; 44 case U_REGEX_BAD_INTERVAL: return "Error in {min,max} interval"; 45 case U_REGEX_MAX_LT_MIN: return "In {min,max}, max is less than min"; 46 case U_REGEX_INVALID_BACK_REF: return "Back-reference to a non-existent capture group"; 47 case U_REGEX_INVALID_FLAG: return "Invalid value for match mode flags"; 48 case U_REGEX_LOOK_BEHIND_LIMIT: return "Look-behind pattern matches must have a bounded maximum length"; 49 case U_REGEX_SET_CONTAINS_STRING: return "Regular expressions cannot have UnicodeSets containing strings"; 50 case U_REGEX_OCTAL_TOO_BIG: return "Octal character constants must be <= 0377."; 51 case U_REGEX_MISSING_CLOSE_BRACKET: return "Missing closing bracket in character class"; 52 case U_REGEX_INVALID_RANGE: return "In a character range [x-y], x is greater than y"; 53 case U_REGEX_STACK_OVERFLOW: return "Regular expression backtrack stack overflow"; 54 case U_REGEX_TIME_OUT: return "Maximum allowed match time exceeded"; 55 case U_REGEX_STOPPED_BY_CALLER: return "Matching operation aborted by user callback function"; 56 default: 57 return u_errorName(status); 58 } 59} 60 61static void throwPatternSyntaxException(JNIEnv* env, UErrorCode status, jstring pattern, UParseError error) { 62 static jmethodID method = env->GetMethodID(JniConstants::patternSyntaxExceptionClass, 63 "<init>", "(Ljava/lang/String;Ljava/lang/String;I)V"); 64 jstring message = env->NewStringUTF(regexDetailMessage(status)); 65 jclass exceptionClass = JniConstants::patternSyntaxExceptionClass; 66 jobject exception = env->NewObject(exceptionClass, method, message, pattern, error.offset); 67 env->Throw(reinterpret_cast<jthrowable>(exception)); 68} 69 70static void Pattern_free(void* addr) { 71 delete reinterpret_cast<icu::RegexPattern*>(addr); 72} 73 74static jlong Pattern_getNativeFinalizer(JNIEnv*, jclass) { 75 return reinterpret_cast<jlong>(&Pattern_free); 76} 77 78// Return a guess of the amount of native memory to be deallocated by a typical call to 79// Pattern_free(). 80static jint Pattern_nativeSize(JNIEnv*, jclass) { 81 return 500; // Very rough guess based on a quick look at the implementation. 82} 83 84static jlong Pattern_compileImpl(JNIEnv* env, jclass, jstring javaRegex, jint flags) { 85 flags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; 86 87 UErrorCode status = U_ZERO_ERROR; 88 UParseError error; 89 error.offset = -1; 90 91 ScopedJavaUnicodeString regex(env, javaRegex); 92 if (!regex.valid()) { 93 return 0; 94 } 95 icu::UnicodeString& regexString(regex.unicodeString()); 96 icu::RegexPattern* result = icu::RegexPattern::compile(regexString, flags, error, status); 97 if (!U_SUCCESS(status)) { 98 throwPatternSyntaxException(env, status, javaRegex, error); 99 } 100 return static_cast<jlong>(reinterpret_cast<uintptr_t>(result)); 101} 102 103static JNINativeMethod gMethods[] = { 104 NATIVE_METHOD(Pattern, compileImpl, "(Ljava/lang/String;I)J"), 105 NATIVE_METHOD(Pattern, getNativeFinalizer, "()J"), 106 NATIVE_METHOD(Pattern, nativeSize, "()I"), 107}; 108 109void register_java_util_regex_Pattern(JNIEnv* env) { 110 jniRegisterNativeMethods(env, "java/util/regex/Pattern", gMethods, NELEM(gMethods)); 111} 112