19066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project/*
29066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project
39066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project *
49066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Licensed under the Apache License, Version 2.0 (the "License");
59066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * you may not use this file except in compliance with the License.
69066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * You may obtain a copy of the License at
79066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project *
89066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project *      http://www.apache.org/licenses/LICENSE-2.0
99066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project *
109066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * Unless required by applicable law or agreed to in writing, software
119066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * distributed under the License is distributed on an "AS IS" BASIS,
129066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
139066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * See the License for the specific language governing permissions and
149066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * limitations under the License.
159066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */
169066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
179066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Projectpackage android.os;
189066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
192baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumannimport android.util.Log;
202baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
212baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumannimport java.util.Arrays;
222baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
239066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project/**
249066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * A simple pattern matcher, which is safe to use on untrusted data: it does
259066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * not provide full reg-exp support, only simple globbing that can not be
269066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project * used maliciously.
279066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project */
289066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Projectpublic class PatternMatcher implements Parcelable {
299066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    /**
309066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * Pattern type: the given pattern must exactly match the string it is
319066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * tested against.
329066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     */
339066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public static final int PATTERN_LITERAL = 0;
349066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
359066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    /**
369066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * Pattern type: the given pattern must match the
379066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * beginning of the string it is tested against.
389066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     */
399066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public static final int PATTERN_PREFIX = 1;
409066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
419066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    /**
429066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * Pattern type: the given pattern is interpreted with a
439066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * simple glob syntax for matching against the string it is tested against.
449066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * In this syntax, you can use the '*' character to match against zero or
459066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * more occurrences of the character immediately before.  If the
469066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * character before it is '.' it will match any character.  The character
479066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * '\' can be used as an escape.  This essentially provides only the '*'
489066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     * wildcard part of a normal regexp.
499066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project     */
509066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public static final int PATTERN_SIMPLE_GLOB = 2;
512baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
522baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    /**
532baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * Pattern type: the given pattern is interpreted with a regular
542baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * expression-like syntax for matching against the string it is tested
552baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
562baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * with full support for character ranges and the not ({@code ^}) modifier.
572baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
582baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * for one-or-more and full range ({@code {...}}) support. This is a simple
599c9fdf27ff74fd4011239b25723f6c05ee5886faTodd Kennedy     * evaluation implementation in which matching is done against the pattern in
609c9fdf27ff74fd4011239b25723f6c05ee5886faTodd Kennedy     * real time with no backtracking support.
612baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     */
622baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    public static final int PATTERN_ADVANCED_GLOB = 3;
632baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
642baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    // token types for advanced matching
652baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int TOKEN_TYPE_LITERAL = 0;
662baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int TOKEN_TYPE_ANY = 1;
672baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int TOKEN_TYPE_SET = 2;
682baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int TOKEN_TYPE_INVERSE_SET = 3;
692baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
702baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    // Return for no match
712baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int NO_MATCH = -1;
722baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
732baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final String TAG = "PatternMatcher";
742baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
752baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    // Parsed placeholders for advanced patterns
762baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_TOKEN_CHAR_SET_START = -1;
772baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
782baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
792baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_TOKEN_CHAR_ANY = -4;
802baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_MODIFIER_RANGE_START = -5;
812baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_MODIFIER_RANGE_STOP = -6;
822baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
832baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
842baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
859066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    private final String mPattern;
869066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    private final int mType;
872baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private final int[] mParsedPattern;
882baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
892baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
902baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int MAX_PATTERN_STORAGE = 2048;
912baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    // workspace to use for building a parsed advanced pattern;
922baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
932baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
949066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public PatternMatcher(String pattern, int type) {
959066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        mPattern = pattern;
969066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        mType = type;
972baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        if (mType == PATTERN_ADVANCED_GLOB) {
982baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
992baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        } else {
1002baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            mParsedPattern = null;
1012baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
1029066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1039066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1049066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public final String getPath() {
1059066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        return mPattern;
1069066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1079066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1089066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public final int getType() {
1099066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        return mType;
1109066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1119066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1129066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public boolean match(String str) {
1132baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return matchPattern(str, mPattern, mParsedPattern, mType);
1149066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1159066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1169066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public String toString() {
1179066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        String type = "? ";
1189066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        switch (mType) {
1199066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            case PATTERN_LITERAL:
1209066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                type = "LITERAL: ";
1219066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                break;
1229066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            case PATTERN_PREFIX:
1239066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                type = "PREFIX: ";
1249066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                break;
1259066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            case PATTERN_SIMPLE_GLOB:
1269066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                type = "GLOB: ";
1279066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                break;
1282baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            case PATTERN_ADVANCED_GLOB:
1292baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                type = "ADVANCED: ";
1302baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                break;
1319066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
1329066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        return "PatternMatcher{" + type + mPattern + "}";
1339066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1349066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1359066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public int describeContents() {
1369066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        return 0;
1379066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1389066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1399066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public void writeToParcel(Parcel dest, int flags) {
1409066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        dest.writeString(mPattern);
1419066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        dest.writeInt(mType);
1422baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        dest.writeIntArray(mParsedPattern);
1439066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1449066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1459066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public PatternMatcher(Parcel src) {
1469066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        mPattern = src.readString();
1479066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        mType = src.readInt();
1482baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        mParsedPattern = src.createIntArray();
1499066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
1509066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1519066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    public static final Parcelable.Creator<PatternMatcher> CREATOR
1529066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            = new Parcelable.Creator<PatternMatcher>() {
1539066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        public PatternMatcher createFromParcel(Parcel source) {
1549066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return new PatternMatcher(source);
1559066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
1569066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1579066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        public PatternMatcher[] newArray(int size) {
1589066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return new PatternMatcher[size];
1599066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
1609066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    };
1619066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
1622baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
1639066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        if (match == null) return false;
1649066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        if (type == PATTERN_LITERAL) {
1659066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return pattern.equals(match);
1669066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        } if (type == PATTERN_PREFIX) {
1679066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return match.startsWith(pattern);
1682baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        } else if (type == PATTERN_SIMPLE_GLOB) {
1692baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            return matchGlobPattern(pattern, match);
1702baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        } else if (type == PATTERN_ADVANCED_GLOB) {
1712baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            return matchAdvancedPattern(parsedPattern, match);
1729066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
1732baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return false;
1742baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
1752baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
1762baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    static boolean matchGlobPattern(String pattern, String match) {
1779066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        final int NP = pattern.length();
1789066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        if (NP <= 0) {
1799066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return match.length() <= 0;
1809066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
1819066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        final int NM = match.length();
1829066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        int ip = 0, im = 0;
1839066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        char nextChar = pattern.charAt(0);
1849066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        while ((ip<NP) && (im<NM)) {
1859066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            char c = nextChar;
1869066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            ip++;
1879066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            nextChar = ip < NP ? pattern.charAt(ip) : 0;
1889066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            final boolean escaped = (c == '\\');
1899066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            if (escaped) {
1909066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                c = nextChar;
1919066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                ip++;
1929066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                nextChar = ip < NP ? pattern.charAt(ip) : 0;
1939066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            }
1949066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            if (nextChar == '*') {
1959066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                if (!escaped && c == '.') {
1969066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    if (ip >= (NP-1)) {
1979066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        // at the end with a pattern match, so
1989066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        // all is good without checking!
1999066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        return true;
2009066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    }
2019066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    ip++;
2029066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    nextChar = pattern.charAt(ip);
2039066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    // Consume everything until the next character in the
2049066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    // pattern is found.
2059066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    if (nextChar == '\\') {
2069066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        ip++;
2079066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        nextChar = ip < NP ? pattern.charAt(ip) : 0;
2089066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    }
2099066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    do {
2109066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        if (match.charAt(im) == nextChar) {
2119066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                            break;
2129066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        }
2139066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        im++;
2149066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    } while (im < NM);
2159066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    if (im == NM) {
2169066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        // Whoops, the next character in the pattern didn't
2179066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        // exist in the match.
2189066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        return false;
2199066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    }
2209066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    ip++;
2219066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
2229066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    im++;
2239066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                } else {
2249066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    // Consume only characters matching the one before '*'.
2259066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    do {
2269066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        if (match.charAt(im) != c) {
2279066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                            break;
2289066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        }
2299066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                        im++;
2309066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    } while (im < NM);
2319066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    ip++;
2329066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
2339066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                }
2349066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            } else {
2359066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                if (c != '.' && match.charAt(im) != c) return false;
2369066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project                im++;
2379066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            }
2389066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
2399066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
2409066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        if (ip >= NP && im >= NM) {
2419066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            // Reached the end of both strings, all is good!
2429066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return true;
2439066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
2449066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
2459066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        // One last check: we may have finished the match string, but still
2469066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        // have a '.*' at the end of the pattern, which should still count
2479066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        // as a match.
2489066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        if (ip == NP-2 && pattern.charAt(ip) == '.'
2499066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            && pattern.charAt(ip+1) == '*') {
2509066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project            return true;
2519066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        }
2529066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project
2539066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project        return false;
2549066cfe9886ac131c34d59ed0e2d287b0e3c0087The Android Open Source Project    }
2552baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2562baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    /**
2572baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * Parses the advanced pattern and returns an integer array representation of it. The integer
2582baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * array treats each field as a character if positive and a unique token placeholder if
2592baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     * negative. This method will throw on any pattern structure violations.
2602baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann     */
2612baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
2622baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int ip = 0;
2632baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        final int LP = pattern.length();
2642baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2652baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int it = 0;
2662baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2672baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        boolean inSet = false;
2682baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        boolean inRange = false;
2692baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        boolean inCharClass = false;
2702baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2712baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        boolean addToParsedPattern;
2722baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2732baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        while (ip < LP) {
2742baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            if (it > MAX_PATTERN_STORAGE - 3) {
2752baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                throw new IllegalArgumentException("Pattern is too large!");
2762baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
2772baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2782baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            char c = pattern.charAt(ip);
2792baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            addToParsedPattern = false;
2802baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
2812baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            switch (c) {
2822baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '[':
2832baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (inSet) {
2842baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        addToParsedPattern = true; // treat as literal or char class in set
2852baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    } else {
2862baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (pattern.charAt(ip + 1) == '^') {
2872baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
2882baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            ip++; // skip over the '^'
2892baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        } else {
2902baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
2912baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
2922baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip++; // move to the next pattern char
2932baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inSet = true;
2942baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        continue;
2952baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
2962baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
2972baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case ']':
2982baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (!inSet) {
2992baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        addToParsedPattern = true; // treat as literal outside of set
3002baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    } else {
3012baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        int parsedToken = sParsedPatternScratch[it - 1];
3022baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
3032baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
3042baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            throw new IllegalArgumentException(
3052baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                                    "You must define characters in a set.");
3062baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
3072baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
3082baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inSet = false;
3092baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inCharClass = false;
3102baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3112baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3122baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '{':
3132baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (!inSet) {
3142baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
3152baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            throw new IllegalArgumentException("Modifier must follow a token.");
3162baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
3172baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
3182baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip++;
3192baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inRange = true;
3202baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3212baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3222baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '}':
3232baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (inRange) { // only terminate the range if we're currently in one
3242baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
3252baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inRange = false;
3262baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3272baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3282baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '*':
3292baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (!inSet) {
3302baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
3312baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            throw new IllegalArgumentException("Modifier must follow a token.");
3322baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
3332baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
3342baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3352baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3362baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '+':
3372baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (!inSet) {
3382baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
3392baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            throw new IllegalArgumentException("Modifier must follow a token.");
3402baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
3412baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
3422baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3432baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3442baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '.':
3452baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (!inSet) {
3462baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
3472baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3482baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3492baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case '\\': // escape
3502baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (ip + 1 >= LP) {
3512baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        throw new IllegalArgumentException("Escape found at end of pattern!");
3522baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3532baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    c = pattern.charAt(++ip);
3542baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    addToParsedPattern = true;
3552baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3562baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                default:
3572baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    addToParsedPattern = true;
3582baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
3592baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
3602baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            if (inSet) {
3612baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                if (inCharClass) {
3622baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    sParsedPatternScratch[it++] = c;
3632baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    inCharClass = false;
3642baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                } else {
3652baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    // look forward for character class
3662baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (ip + 2 < LP
3672baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            && pattern.charAt(ip + 1) == '-'
3682baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            && pattern.charAt(ip + 2) != ']') {
3692baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        inCharClass = true;
3702baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = c; // set first token as lower end of range
3712baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip++; // advance past dash
3722baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    } else { // literal
3732baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = c; // set first token as literal
3742baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        sParsedPatternScratch[it++] = c; // set second set as literal
3752baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3762baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
3772baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            } else if (inRange) {
3782baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                int endOfSet = pattern.indexOf('}', ip);
3792baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                if (endOfSet < 0) {
3802baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    throw new IllegalArgumentException("Range not ended with '}'");
3812baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
3822baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                String rangeString = pattern.substring(ip, endOfSet);
3832baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                int commaIndex = rangeString.indexOf(',');
3842baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                try {
3852baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    final int rangeMin;
3862baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    final int rangeMax;
3872baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (commaIndex < 0) {
3882baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        int parsedRange = Integer.parseInt(rangeString);
3892baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        rangeMin = rangeMax = parsedRange;
3902baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    } else {
3912baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
3922baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
3932baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            rangeMax = Integer.MAX_VALUE;
3942baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        } else {
3952baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
3962baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        }
3972baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
3982baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (rangeMin > rangeMax) {
3992baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        throw new IllegalArgumentException(
4002baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            "Range quantifier minimum is greater than maximum");
4012baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
4022baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    sParsedPatternScratch[it++] = rangeMin;
4032baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    sParsedPatternScratch[it++] = rangeMax;
4042baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                } catch (NumberFormatException e) {
4052baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    throw new IllegalArgumentException("Range number format incorrect", e);
4062baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
4072baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                ip = endOfSet;
4082baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                continue; // don't increment ip
4092baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            } else if (addToParsedPattern) {
4102baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                sParsedPatternScratch[it++] = c;
4112baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
4122baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            ip++;
4132baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
4142baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        if (inSet) {
4152baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            throw new IllegalArgumentException("Set was not terminated!");
4162baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
4172baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return Arrays.copyOf(sParsedPatternScratch, it);
4182baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
4192baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4202baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static boolean isParsedModifier(int parsedChar) {
4212baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
4222baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
4232baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                parsedChar == PARSED_MODIFIER_RANGE_STOP ||
4242baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                parsedChar == PARSED_MODIFIER_RANGE_START;
4252baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
4262baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4272baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
4282baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4292baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        // create indexes
4302baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int ip = 0, im = 0;
4312baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4322baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        // one-time length check
4332baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        final int LP = parsedPattern.length, LM = match.length();
4342baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4352baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        // The current character being analyzed in the pattern
4362baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int patternChar;
4372baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4382baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int tokenType;
4392baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4402baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int charSetStart = 0, charSetEnd = 0;
4412baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4422baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        while (ip < LP) { // we still have content in the pattern
4432baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4442baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            patternChar = parsedPattern[ip];
4452baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            // get the match type of the next verb
4462baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4472baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            switch (patternChar) {
4482baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case PARSED_TOKEN_CHAR_ANY:
4492baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    tokenType = TOKEN_TYPE_ANY;
4502baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    ip++;
4512baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
4522baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case PARSED_TOKEN_CHAR_SET_START:
4532baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                case PARSED_TOKEN_CHAR_SET_INVERSE_START:
4542baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
4552baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            ? TOKEN_TYPE_SET
4562baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                            : TOKEN_TYPE_INVERSE_SET;
4572baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    charSetStart = ip + 1; // start from the char after the set start
4582baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
4592baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    charSetEnd = ip - 1; // we're on the set stop, end is the previous
4602baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    ip++; // move the pointer to the next pattern entry
4612baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
4622baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                default:
4632baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    charSetStart = ip;
4642baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    tokenType = TOKEN_TYPE_LITERAL;
4652baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    ip++;
4662baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    break;
4672baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
4682baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4692baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            final int minRepetition;
4702baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            final int maxRepetition;
4712baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
4722baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            // look for a match length modifier
4732baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            if (ip >= LP) {
4742baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                minRepetition = maxRepetition = 1;
4752baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            } else {
4762baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                patternChar = parsedPattern[ip];
4772baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                switch (patternChar) {
4782baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    case PARSED_MODIFIER_ZERO_OR_MORE:
4792baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        minRepetition = 0;
4802baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        maxRepetition = Integer.MAX_VALUE;
4812baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip++;
4822baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        break;
4832baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    case PARSED_MODIFIER_ONE_OR_MORE:
4842baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        minRepetition = 1;
4852baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        maxRepetition = Integer.MAX_VALUE;
4862baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip++;
4872baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        break;
4882baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    case PARSED_MODIFIER_RANGE_START:
4892baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        minRepetition = parsedPattern[++ip];
4902baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        maxRepetition = parsedPattern[++ip];
4912baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
4922baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        break;
4932baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    default:
4942baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        minRepetition = maxRepetition = 1; // implied literal
4952baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        break;
4962baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
4972baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
4982baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            if (minRepetition > maxRepetition) {
4992baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return false;
5002baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
5012baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5022baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            // attempt to match as many characters as possible
5032baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
5042baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    parsedPattern, charSetStart, charSetEnd);
5052baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5062baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            // if we found a conflict, return false immediately
5072baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            if (matched == NO_MATCH) {
5082baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return false;
5092baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            }
5102baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5112baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            // move the match pointer the number of characters matched
5122baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            im += matched;
5132baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
5142baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return ip >= LP && im >= LM; // have parsed entire string and regex
5152baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
5162baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5172baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static int matchChars(String match, int im, final int lm, int tokenType,
5182baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            int minRepetition, int maxRepetition, int[] parsedPattern,
5192baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            int tokenStart, int tokenEnd) {
5202baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        int matched = 0;
5212baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5222baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        while(matched < maxRepetition
5232baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
5242baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    tokenEnd)) {
5252baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            matched++;
5262baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
5272baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5282baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        return matched < minRepetition ? NO_MATCH : matched;
5292baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
5302baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann
5312baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    private static boolean matchChar(String match, int im, final int lm, int tokenType,
5322baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            int[] parsedPattern, int tokenStart, int tokenEnd) {
5332baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        if (im >= lm) { // we've overrun the string, no match
5342baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            return false;
5352baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
5362baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        switch (tokenType) {
5372baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            case TOKEN_TYPE_ANY:
5382baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return true;
5392baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            case TOKEN_TYPE_SET:
5402baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                for (int i = tokenStart; i < tokenEnd; i += 2) {
5412baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    char matchChar = match.charAt(im);
5422baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
5432baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        return true;
5442baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
5452baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
5462baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return false;
5472baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            case TOKEN_TYPE_INVERSE_SET:
5482baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                for (int i = tokenStart; i < tokenEnd; i += 2) {
5492baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    char matchChar = match.charAt(im);
5502baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
5512baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                        return false;
5522baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                    }
5532baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                }
5542baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return true;
5552baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            case TOKEN_TYPE_LITERAL:
5562baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return match.charAt(im) == parsedPattern[tokenStart];
5572baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann            default:
5582baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann                return false;
5592baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann        }
5602baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann    }
5612baf095ca9e3827f721271a915d1e04db0718bf9Patrick Baumann}