1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.os;
18
19import android.util.Log;
20
21import java.util.Arrays;
22
23/**
24 * A simple pattern matcher, which is safe to use on untrusted data: it does
25 * not provide full reg-exp support, only simple globbing that can not be
26 * used maliciously.
27 */
28public class PatternMatcher implements Parcelable {
29    /**
30     * Pattern type: the given pattern must exactly match the string it is
31     * tested against.
32     */
33    public static final int PATTERN_LITERAL = 0;
34
35    /**
36     * Pattern type: the given pattern must match the
37     * beginning of the string it is tested against.
38     */
39    public static final int PATTERN_PREFIX = 1;
40
41    /**
42     * Pattern type: the given pattern is interpreted with a
43     * simple glob syntax for matching against the string it is tested against.
44     * In this syntax, you can use the '*' character to match against zero or
45     * more occurrences of the character immediately before.  If the
46     * character before it is '.' it will match any character.  The character
47     * '\' can be used as an escape.  This essentially provides only the '*'
48     * wildcard part of a normal regexp.
49     */
50    public static final int PATTERN_SIMPLE_GLOB = 2;
51
52    /**
53     * Pattern type: the given pattern is interpreted with a regular
54     * expression-like syntax for matching against the string it is tested
55     * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56     * with full support for character ranges and the not ({@code ^}) modifier.
57     * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58     * for one-or-more and full range ({@code {...}}) support. This is a simple
59     * evaluation implementation in which matching is done against the pattern in
60     * real time with no backtracking support.
61     */
62    public static final int PATTERN_ADVANCED_GLOB = 3;
63
64    // token types for advanced matching
65    private static final int TOKEN_TYPE_LITERAL = 0;
66    private static final int TOKEN_TYPE_ANY = 1;
67    private static final int TOKEN_TYPE_SET = 2;
68    private static final int TOKEN_TYPE_INVERSE_SET = 3;
69
70    // Return for no match
71    private static final int NO_MATCH = -1;
72
73    private static final String TAG = "PatternMatcher";
74
75    // Parsed placeholders for advanced patterns
76    private static final int PARSED_TOKEN_CHAR_SET_START = -1;
77    private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
78    private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
79    private static final int PARSED_TOKEN_CHAR_ANY = -4;
80    private static final int PARSED_MODIFIER_RANGE_START = -5;
81    private static final int PARSED_MODIFIER_RANGE_STOP = -6;
82    private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
83    private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
84
85    private final String mPattern;
86    private final int mType;
87    private final int[] mParsedPattern;
88
89
90    private static final int MAX_PATTERN_STORAGE = 2048;
91    // workspace to use for building a parsed advanced pattern;
92    private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
93
94    public PatternMatcher(String pattern, int type) {
95        mPattern = pattern;
96        mType = type;
97        if (mType == PATTERN_ADVANCED_GLOB) {
98            mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
99        } else {
100            mParsedPattern = null;
101        }
102    }
103
104    public final String getPath() {
105        return mPattern;
106    }
107
108    public final int getType() {
109        return mType;
110    }
111
112    public boolean match(String str) {
113        return matchPattern(str, mPattern, mParsedPattern, mType);
114    }
115
116    public String toString() {
117        String type = "? ";
118        switch (mType) {
119            case PATTERN_LITERAL:
120                type = "LITERAL: ";
121                break;
122            case PATTERN_PREFIX:
123                type = "PREFIX: ";
124                break;
125            case PATTERN_SIMPLE_GLOB:
126                type = "GLOB: ";
127                break;
128            case PATTERN_ADVANCED_GLOB:
129                type = "ADVANCED: ";
130                break;
131        }
132        return "PatternMatcher{" + type + mPattern + "}";
133    }
134
135    public int describeContents() {
136        return 0;
137    }
138
139    public void writeToParcel(Parcel dest, int flags) {
140        dest.writeString(mPattern);
141        dest.writeInt(mType);
142        dest.writeIntArray(mParsedPattern);
143    }
144
145    public PatternMatcher(Parcel src) {
146        mPattern = src.readString();
147        mType = src.readInt();
148        mParsedPattern = src.createIntArray();
149    }
150
151    public static final Parcelable.Creator<PatternMatcher> CREATOR
152            = new Parcelable.Creator<PatternMatcher>() {
153        public PatternMatcher createFromParcel(Parcel source) {
154            return new PatternMatcher(source);
155        }
156
157        public PatternMatcher[] newArray(int size) {
158            return new PatternMatcher[size];
159        }
160    };
161
162    static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
163        if (match == null) return false;
164        if (type == PATTERN_LITERAL) {
165            return pattern.equals(match);
166        } if (type == PATTERN_PREFIX) {
167            return match.startsWith(pattern);
168        } else if (type == PATTERN_SIMPLE_GLOB) {
169            return matchGlobPattern(pattern, match);
170        } else if (type == PATTERN_ADVANCED_GLOB) {
171            return matchAdvancedPattern(parsedPattern, match);
172        }
173        return false;
174    }
175
176    static boolean matchGlobPattern(String pattern, String match) {
177        final int NP = pattern.length();
178        if (NP <= 0) {
179            return match.length() <= 0;
180        }
181        final int NM = match.length();
182        int ip = 0, im = 0;
183        char nextChar = pattern.charAt(0);
184        while ((ip<NP) && (im<NM)) {
185            char c = nextChar;
186            ip++;
187            nextChar = ip < NP ? pattern.charAt(ip) : 0;
188            final boolean escaped = (c == '\\');
189            if (escaped) {
190                c = nextChar;
191                ip++;
192                nextChar = ip < NP ? pattern.charAt(ip) : 0;
193            }
194            if (nextChar == '*') {
195                if (!escaped && c == '.') {
196                    if (ip >= (NP-1)) {
197                        // at the end with a pattern match, so
198                        // all is good without checking!
199                        return true;
200                    }
201                    ip++;
202                    nextChar = pattern.charAt(ip);
203                    // Consume everything until the next character in the
204                    // pattern is found.
205                    if (nextChar == '\\') {
206                        ip++;
207                        nextChar = ip < NP ? pattern.charAt(ip) : 0;
208                    }
209                    do {
210                        if (match.charAt(im) == nextChar) {
211                            break;
212                        }
213                        im++;
214                    } while (im < NM);
215                    if (im == NM) {
216                        // Whoops, the next character in the pattern didn't
217                        // exist in the match.
218                        return false;
219                    }
220                    ip++;
221                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
222                    im++;
223                } else {
224                    // Consume only characters matching the one before '*'.
225                    do {
226                        if (match.charAt(im) != c) {
227                            break;
228                        }
229                        im++;
230                    } while (im < NM);
231                    ip++;
232                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
233                }
234            } else {
235                if (c != '.' && match.charAt(im) != c) return false;
236                im++;
237            }
238        }
239
240        if (ip >= NP && im >= NM) {
241            // Reached the end of both strings, all is good!
242            return true;
243        }
244
245        // One last check: we may have finished the match string, but still
246        // have a '.*' at the end of the pattern, which should still count
247        // as a match.
248        if (ip == NP-2 && pattern.charAt(ip) == '.'
249            && pattern.charAt(ip+1) == '*') {
250            return true;
251        }
252
253        return false;
254    }
255
256    /**
257     * Parses the advanced pattern and returns an integer array representation of it. The integer
258     * array treats each field as a character if positive and a unique token placeholder if
259     * negative. This method will throw on any pattern structure violations.
260     */
261    synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
262        int ip = 0;
263        final int LP = pattern.length();
264
265        int it = 0;
266
267        boolean inSet = false;
268        boolean inRange = false;
269        boolean inCharClass = false;
270
271        boolean addToParsedPattern;
272
273        while (ip < LP) {
274            if (it > MAX_PATTERN_STORAGE - 3) {
275                throw new IllegalArgumentException("Pattern is too large!");
276            }
277
278            char c = pattern.charAt(ip);
279            addToParsedPattern = false;
280
281            switch (c) {
282                case '[':
283                    if (inSet) {
284                        addToParsedPattern = true; // treat as literal or char class in set
285                    } else {
286                        if (pattern.charAt(ip + 1) == '^') {
287                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
288                            ip++; // skip over the '^'
289                        } else {
290                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
291                        }
292                        ip++; // move to the next pattern char
293                        inSet = true;
294                        continue;
295                    }
296                    break;
297                case ']':
298                    if (!inSet) {
299                        addToParsedPattern = true; // treat as literal outside of set
300                    } else {
301                        int parsedToken = sParsedPatternScratch[it - 1];
302                        if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
303                            parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
304                            throw new IllegalArgumentException(
305                                    "You must define characters in a set.");
306                        }
307                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
308                        inSet = false;
309                        inCharClass = false;
310                    }
311                    break;
312                case '{':
313                    if (!inSet) {
314                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
315                            throw new IllegalArgumentException("Modifier must follow a token.");
316                        }
317                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
318                        ip++;
319                        inRange = true;
320                    }
321                    break;
322                case '}':
323                    if (inRange) { // only terminate the range if we're currently in one
324                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
325                        inRange = false;
326                    }
327                    break;
328                case '*':
329                    if (!inSet) {
330                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
331                            throw new IllegalArgumentException("Modifier must follow a token.");
332                        }
333                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
334                    }
335                    break;
336                case '+':
337                    if (!inSet) {
338                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
339                            throw new IllegalArgumentException("Modifier must follow a token.");
340                        }
341                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
342                    }
343                    break;
344                case '.':
345                    if (!inSet) {
346                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
347                    }
348                    break;
349                case '\\': // escape
350                    if (ip + 1 >= LP) {
351                        throw new IllegalArgumentException("Escape found at end of pattern!");
352                    }
353                    c = pattern.charAt(++ip);
354                    addToParsedPattern = true;
355                    break;
356                default:
357                    addToParsedPattern = true;
358                    break;
359            }
360            if (inSet) {
361                if (inCharClass) {
362                    sParsedPatternScratch[it++] = c;
363                    inCharClass = false;
364                } else {
365                    // look forward for character class
366                    if (ip + 2 < LP
367                            && pattern.charAt(ip + 1) == '-'
368                            && pattern.charAt(ip + 2) != ']') {
369                        inCharClass = true;
370                        sParsedPatternScratch[it++] = c; // set first token as lower end of range
371                        ip++; // advance past dash
372                    } else { // literal
373                        sParsedPatternScratch[it++] = c; // set first token as literal
374                        sParsedPatternScratch[it++] = c; // set second set as literal
375                    }
376                }
377            } else if (inRange) {
378                int endOfSet = pattern.indexOf('}', ip);
379                if (endOfSet < 0) {
380                    throw new IllegalArgumentException("Range not ended with '}'");
381                }
382                String rangeString = pattern.substring(ip, endOfSet);
383                int commaIndex = rangeString.indexOf(',');
384                try {
385                    final int rangeMin;
386                    final int rangeMax;
387                    if (commaIndex < 0) {
388                        int parsedRange = Integer.parseInt(rangeString);
389                        rangeMin = rangeMax = parsedRange;
390                    } else {
391                        rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
392                        if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
393                            rangeMax = Integer.MAX_VALUE;
394                        } else {
395                            rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
396                        }
397                    }
398                    if (rangeMin > rangeMax) {
399                        throw new IllegalArgumentException(
400                            "Range quantifier minimum is greater than maximum");
401                    }
402                    sParsedPatternScratch[it++] = rangeMin;
403                    sParsedPatternScratch[it++] = rangeMax;
404                } catch (NumberFormatException e) {
405                    throw new IllegalArgumentException("Range number format incorrect", e);
406                }
407                ip = endOfSet;
408                continue; // don't increment ip
409            } else if (addToParsedPattern) {
410                sParsedPatternScratch[it++] = c;
411            }
412            ip++;
413        }
414        if (inSet) {
415            throw new IllegalArgumentException("Set was not terminated!");
416        }
417        return Arrays.copyOf(sParsedPatternScratch, it);
418    }
419
420    private static boolean isParsedModifier(int parsedChar) {
421        return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
422                parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
423                parsedChar == PARSED_MODIFIER_RANGE_STOP ||
424                parsedChar == PARSED_MODIFIER_RANGE_START;
425    }
426
427    static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
428
429        // create indexes
430        int ip = 0, im = 0;
431
432        // one-time length check
433        final int LP = parsedPattern.length, LM = match.length();
434
435        // The current character being analyzed in the pattern
436        int patternChar;
437
438        int tokenType;
439
440        int charSetStart = 0, charSetEnd = 0;
441
442        while (ip < LP) { // we still have content in the pattern
443
444            patternChar = parsedPattern[ip];
445            // get the match type of the next verb
446
447            switch (patternChar) {
448                case PARSED_TOKEN_CHAR_ANY:
449                    tokenType = TOKEN_TYPE_ANY;
450                    ip++;
451                    break;
452                case PARSED_TOKEN_CHAR_SET_START:
453                case PARSED_TOKEN_CHAR_SET_INVERSE_START:
454                    tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
455                            ? TOKEN_TYPE_SET
456                            : TOKEN_TYPE_INVERSE_SET;
457                    charSetStart = ip + 1; // start from the char after the set start
458                    while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
459                    charSetEnd = ip - 1; // we're on the set stop, end is the previous
460                    ip++; // move the pointer to the next pattern entry
461                    break;
462                default:
463                    charSetStart = ip;
464                    tokenType = TOKEN_TYPE_LITERAL;
465                    ip++;
466                    break;
467            }
468
469            final int minRepetition;
470            final int maxRepetition;
471
472            // look for a match length modifier
473            if (ip >= LP) {
474                minRepetition = maxRepetition = 1;
475            } else {
476                patternChar = parsedPattern[ip];
477                switch (patternChar) {
478                    case PARSED_MODIFIER_ZERO_OR_MORE:
479                        minRepetition = 0;
480                        maxRepetition = Integer.MAX_VALUE;
481                        ip++;
482                        break;
483                    case PARSED_MODIFIER_ONE_OR_MORE:
484                        minRepetition = 1;
485                        maxRepetition = Integer.MAX_VALUE;
486                        ip++;
487                        break;
488                    case PARSED_MODIFIER_RANGE_START:
489                        minRepetition = parsedPattern[++ip];
490                        maxRepetition = parsedPattern[++ip];
491                        ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
492                        break;
493                    default:
494                        minRepetition = maxRepetition = 1; // implied literal
495                        break;
496                }
497            }
498            if (minRepetition > maxRepetition) {
499                return false;
500            }
501
502            // attempt to match as many characters as possible
503            int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
504                    parsedPattern, charSetStart, charSetEnd);
505
506            // if we found a conflict, return false immediately
507            if (matched == NO_MATCH) {
508                return false;
509            }
510
511            // move the match pointer the number of characters matched
512            im += matched;
513        }
514        return ip >= LP && im >= LM; // have parsed entire string and regex
515    }
516
517    private static int matchChars(String match, int im, final int lm, int tokenType,
518            int minRepetition, int maxRepetition, int[] parsedPattern,
519            int tokenStart, int tokenEnd) {
520        int matched = 0;
521
522        while(matched < maxRepetition
523                && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
524                    tokenEnd)) {
525            matched++;
526        }
527
528        return matched < minRepetition ? NO_MATCH : matched;
529    }
530
531    private static boolean matchChar(String match, int im, final int lm, int tokenType,
532            int[] parsedPattern, int tokenStart, int tokenEnd) {
533        if (im >= lm) { // we've overrun the string, no match
534            return false;
535        }
536        switch (tokenType) {
537            case TOKEN_TYPE_ANY:
538                return true;
539            case TOKEN_TYPE_SET:
540                for (int i = tokenStart; i < tokenEnd; i += 2) {
541                    char matchChar = match.charAt(im);
542                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
543                        return true;
544                    }
545                }
546                return false;
547            case TOKEN_TYPE_INVERSE_SET:
548                for (int i = tokenStart; i < tokenEnd; i += 2) {
549                    char matchChar = match.charAt(im);
550                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
551                        return false;
552                    }
553                }
554                return true;
555            case TOKEN_TYPE_LITERAL:
556                return match.charAt(im) == parsedPattern[tokenStart];
557            default:
558                return false;
559        }
560    }
561}