1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.os;
18
19import android.util.proto.ProtoOutputStream;
20
21import java.util.Arrays;
22
23/**
24 * A simple pattern matcher, which is safe to use on untrusted data: it does
25 * not provide full reg-exp support, only simple globbing that can not be
26 * used maliciously.
27 */
28public class PatternMatcher implements Parcelable {
29    /**
30     * Pattern type: the given pattern must exactly match the string it is
31     * tested against.
32     */
33    public static final int PATTERN_LITERAL = 0;
34
35    /**
36     * Pattern type: the given pattern must match the
37     * beginning of the string it is tested against.
38     */
39    public static final int PATTERN_PREFIX = 1;
40
41    /**
42     * Pattern type: the given pattern is interpreted with a
43     * simple glob syntax for matching against the string it is tested against.
44     * In this syntax, you can use the '*' character to match against zero or
45     * more occurrences of the character immediately before.  If the
46     * character before it is '.' it will match any character.  The character
47     * '\' can be used as an escape.  This essentially provides only the '*'
48     * wildcard part of a normal regexp.
49     */
50    public static final int PATTERN_SIMPLE_GLOB = 2;
51
52    /**
53     * Pattern type: the given pattern is interpreted with a regular
54     * expression-like syntax for matching against the string it is tested
55     * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56     * with full support for character ranges and the not ({@code ^}) modifier.
57     * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58     * for one-or-more and full range ({@code {...}}) support. This is a simple
59     * evaluation implementation in which matching is done against the pattern in
60     * real time with no backtracking support.
61     */
62    public static final int PATTERN_ADVANCED_GLOB = 3;
63
64    // token types for advanced matching
65    private static final int TOKEN_TYPE_LITERAL = 0;
66    private static final int TOKEN_TYPE_ANY = 1;
67    private static final int TOKEN_TYPE_SET = 2;
68    private static final int TOKEN_TYPE_INVERSE_SET = 3;
69
70    // Return for no match
71    private static final int NO_MATCH = -1;
72
73    private static final String TAG = "PatternMatcher";
74
75    // Parsed placeholders for advanced patterns
76    private static final int PARSED_TOKEN_CHAR_SET_START = -1;
77    private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
78    private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
79    private static final int PARSED_TOKEN_CHAR_ANY = -4;
80    private static final int PARSED_MODIFIER_RANGE_START = -5;
81    private static final int PARSED_MODIFIER_RANGE_STOP = -6;
82    private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
83    private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
84
85    private final String mPattern;
86    private final int mType;
87    private final int[] mParsedPattern;
88
89
90    private static final int MAX_PATTERN_STORAGE = 2048;
91    // workspace to use for building a parsed advanced pattern;
92    private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
93
94    public PatternMatcher(String pattern, int type) {
95        mPattern = pattern;
96        mType = type;
97        if (mType == PATTERN_ADVANCED_GLOB) {
98            mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
99        } else {
100            mParsedPattern = null;
101        }
102    }
103
104    public final String getPath() {
105        return mPattern;
106    }
107
108    public final int getType() {
109        return mType;
110    }
111
112    public boolean match(String str) {
113        return matchPattern(str, mPattern, mParsedPattern, mType);
114    }
115
116    public String toString() {
117        String type = "? ";
118        switch (mType) {
119            case PATTERN_LITERAL:
120                type = "LITERAL: ";
121                break;
122            case PATTERN_PREFIX:
123                type = "PREFIX: ";
124                break;
125            case PATTERN_SIMPLE_GLOB:
126                type = "GLOB: ";
127                break;
128            case PATTERN_ADVANCED_GLOB:
129                type = "ADVANCED: ";
130                break;
131        }
132        return "PatternMatcher{" + type + mPattern + "}";
133    }
134
135    /** @hide */
136    public void writeToProto(ProtoOutputStream proto, long fieldId) {
137        long token = proto.start(fieldId);
138        proto.write(PatternMatcherProto.PATTERN, mPattern);
139        proto.write(PatternMatcherProto.TYPE, mType);
140        // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
141        // match the current data structure.
142        proto.end(token);
143    }
144
145    public int describeContents() {
146        return 0;
147    }
148
149    public void writeToParcel(Parcel dest, int flags) {
150        dest.writeString(mPattern);
151        dest.writeInt(mType);
152        dest.writeIntArray(mParsedPattern);
153    }
154
155    public PatternMatcher(Parcel src) {
156        mPattern = src.readString();
157        mType = src.readInt();
158        mParsedPattern = src.createIntArray();
159    }
160
161    public static final Parcelable.Creator<PatternMatcher> CREATOR
162            = new Parcelable.Creator<PatternMatcher>() {
163        public PatternMatcher createFromParcel(Parcel source) {
164            return new PatternMatcher(source);
165        }
166
167        public PatternMatcher[] newArray(int size) {
168            return new PatternMatcher[size];
169        }
170    };
171
172    static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
173        if (match == null) return false;
174        if (type == PATTERN_LITERAL) {
175            return pattern.equals(match);
176        } if (type == PATTERN_PREFIX) {
177            return match.startsWith(pattern);
178        } else if (type == PATTERN_SIMPLE_GLOB) {
179            return matchGlobPattern(pattern, match);
180        } else if (type == PATTERN_ADVANCED_GLOB) {
181            return matchAdvancedPattern(parsedPattern, match);
182        }
183        return false;
184    }
185
186    static boolean matchGlobPattern(String pattern, String match) {
187        final int NP = pattern.length();
188        if (NP <= 0) {
189            return match.length() <= 0;
190        }
191        final int NM = match.length();
192        int ip = 0, im = 0;
193        char nextChar = pattern.charAt(0);
194        while ((ip<NP) && (im<NM)) {
195            char c = nextChar;
196            ip++;
197            nextChar = ip < NP ? pattern.charAt(ip) : 0;
198            final boolean escaped = (c == '\\');
199            if (escaped) {
200                c = nextChar;
201                ip++;
202                nextChar = ip < NP ? pattern.charAt(ip) : 0;
203            }
204            if (nextChar == '*') {
205                if (!escaped && c == '.') {
206                    if (ip >= (NP-1)) {
207                        // at the end with a pattern match, so
208                        // all is good without checking!
209                        return true;
210                    }
211                    ip++;
212                    nextChar = pattern.charAt(ip);
213                    // Consume everything until the next character in the
214                    // pattern is found.
215                    if (nextChar == '\\') {
216                        ip++;
217                        nextChar = ip < NP ? pattern.charAt(ip) : 0;
218                    }
219                    do {
220                        if (match.charAt(im) == nextChar) {
221                            break;
222                        }
223                        im++;
224                    } while (im < NM);
225                    if (im == NM) {
226                        // Whoops, the next character in the pattern didn't
227                        // exist in the match.
228                        return false;
229                    }
230                    ip++;
231                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
232                    im++;
233                } else {
234                    // Consume only characters matching the one before '*'.
235                    do {
236                        if (match.charAt(im) != c) {
237                            break;
238                        }
239                        im++;
240                    } while (im < NM);
241                    ip++;
242                    nextChar = ip < NP ? pattern.charAt(ip) : 0;
243                }
244            } else {
245                if (c != '.' && match.charAt(im) != c) return false;
246                im++;
247            }
248        }
249
250        if (ip >= NP && im >= NM) {
251            // Reached the end of both strings, all is good!
252            return true;
253        }
254
255        // One last check: we may have finished the match string, but still
256        // have a '.*' at the end of the pattern, which should still count
257        // as a match.
258        if (ip == NP-2 && pattern.charAt(ip) == '.'
259            && pattern.charAt(ip+1) == '*') {
260            return true;
261        }
262
263        return false;
264    }
265
266    /**
267     * Parses the advanced pattern and returns an integer array representation of it. The integer
268     * array treats each field as a character if positive and a unique token placeholder if
269     * negative. This method will throw on any pattern structure violations.
270     */
271    synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
272        int ip = 0;
273        final int LP = pattern.length();
274
275        int it = 0;
276
277        boolean inSet = false;
278        boolean inRange = false;
279        boolean inCharClass = false;
280
281        boolean addToParsedPattern;
282
283        while (ip < LP) {
284            if (it > MAX_PATTERN_STORAGE - 3) {
285                throw new IllegalArgumentException("Pattern is too large!");
286            }
287
288            char c = pattern.charAt(ip);
289            addToParsedPattern = false;
290
291            switch (c) {
292                case '[':
293                    if (inSet) {
294                        addToParsedPattern = true; // treat as literal or char class in set
295                    } else {
296                        if (pattern.charAt(ip + 1) == '^') {
297                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
298                            ip++; // skip over the '^'
299                        } else {
300                            sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
301                        }
302                        ip++; // move to the next pattern char
303                        inSet = true;
304                        continue;
305                    }
306                    break;
307                case ']':
308                    if (!inSet) {
309                        addToParsedPattern = true; // treat as literal outside of set
310                    } else {
311                        int parsedToken = sParsedPatternScratch[it - 1];
312                        if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
313                            parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
314                            throw new IllegalArgumentException(
315                                    "You must define characters in a set.");
316                        }
317                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
318                        inSet = false;
319                        inCharClass = false;
320                    }
321                    break;
322                case '{':
323                    if (!inSet) {
324                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
325                            throw new IllegalArgumentException("Modifier must follow a token.");
326                        }
327                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
328                        ip++;
329                        inRange = true;
330                    }
331                    break;
332                case '}':
333                    if (inRange) { // only terminate the range if we're currently in one
334                        sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
335                        inRange = false;
336                    }
337                    break;
338                case '*':
339                    if (!inSet) {
340                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
341                            throw new IllegalArgumentException("Modifier must follow a token.");
342                        }
343                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
344                    }
345                    break;
346                case '+':
347                    if (!inSet) {
348                        if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
349                            throw new IllegalArgumentException("Modifier must follow a token.");
350                        }
351                        sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
352                    }
353                    break;
354                case '.':
355                    if (!inSet) {
356                        sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
357                    }
358                    break;
359                case '\\': // escape
360                    if (ip + 1 >= LP) {
361                        throw new IllegalArgumentException("Escape found at end of pattern!");
362                    }
363                    c = pattern.charAt(++ip);
364                    addToParsedPattern = true;
365                    break;
366                default:
367                    addToParsedPattern = true;
368                    break;
369            }
370            if (inSet) {
371                if (inCharClass) {
372                    sParsedPatternScratch[it++] = c;
373                    inCharClass = false;
374                } else {
375                    // look forward for character class
376                    if (ip + 2 < LP
377                            && pattern.charAt(ip + 1) == '-'
378                            && pattern.charAt(ip + 2) != ']') {
379                        inCharClass = true;
380                        sParsedPatternScratch[it++] = c; // set first token as lower end of range
381                        ip++; // advance past dash
382                    } else { // literal
383                        sParsedPatternScratch[it++] = c; // set first token as literal
384                        sParsedPatternScratch[it++] = c; // set second set as literal
385                    }
386                }
387            } else if (inRange) {
388                int endOfSet = pattern.indexOf('}', ip);
389                if (endOfSet < 0) {
390                    throw new IllegalArgumentException("Range not ended with '}'");
391                }
392                String rangeString = pattern.substring(ip, endOfSet);
393                int commaIndex = rangeString.indexOf(',');
394                try {
395                    final int rangeMin;
396                    final int rangeMax;
397                    if (commaIndex < 0) {
398                        int parsedRange = Integer.parseInt(rangeString);
399                        rangeMin = rangeMax = parsedRange;
400                    } else {
401                        rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
402                        if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
403                            rangeMax = Integer.MAX_VALUE;
404                        } else {
405                            rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
406                        }
407                    }
408                    if (rangeMin > rangeMax) {
409                        throw new IllegalArgumentException(
410                            "Range quantifier minimum is greater than maximum");
411                    }
412                    sParsedPatternScratch[it++] = rangeMin;
413                    sParsedPatternScratch[it++] = rangeMax;
414                } catch (NumberFormatException e) {
415                    throw new IllegalArgumentException("Range number format incorrect", e);
416                }
417                ip = endOfSet;
418                continue; // don't increment ip
419            } else if (addToParsedPattern) {
420                sParsedPatternScratch[it++] = c;
421            }
422            ip++;
423        }
424        if (inSet) {
425            throw new IllegalArgumentException("Set was not terminated!");
426        }
427        return Arrays.copyOf(sParsedPatternScratch, it);
428    }
429
430    private static boolean isParsedModifier(int parsedChar) {
431        return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
432                parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
433                parsedChar == PARSED_MODIFIER_RANGE_STOP ||
434                parsedChar == PARSED_MODIFIER_RANGE_START;
435    }
436
437    static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
438
439        // create indexes
440        int ip = 0, im = 0;
441
442        // one-time length check
443        final int LP = parsedPattern.length, LM = match.length();
444
445        // The current character being analyzed in the pattern
446        int patternChar;
447
448        int tokenType;
449
450        int charSetStart = 0, charSetEnd = 0;
451
452        while (ip < LP) { // we still have content in the pattern
453
454            patternChar = parsedPattern[ip];
455            // get the match type of the next verb
456
457            switch (patternChar) {
458                case PARSED_TOKEN_CHAR_ANY:
459                    tokenType = TOKEN_TYPE_ANY;
460                    ip++;
461                    break;
462                case PARSED_TOKEN_CHAR_SET_START:
463                case PARSED_TOKEN_CHAR_SET_INVERSE_START:
464                    tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
465                            ? TOKEN_TYPE_SET
466                            : TOKEN_TYPE_INVERSE_SET;
467                    charSetStart = ip + 1; // start from the char after the set start
468                    while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
469                    charSetEnd = ip - 1; // we're on the set stop, end is the previous
470                    ip++; // move the pointer to the next pattern entry
471                    break;
472                default:
473                    charSetStart = ip;
474                    tokenType = TOKEN_TYPE_LITERAL;
475                    ip++;
476                    break;
477            }
478
479            final int minRepetition;
480            final int maxRepetition;
481
482            // look for a match length modifier
483            if (ip >= LP) {
484                minRepetition = maxRepetition = 1;
485            } else {
486                patternChar = parsedPattern[ip];
487                switch (patternChar) {
488                    case PARSED_MODIFIER_ZERO_OR_MORE:
489                        minRepetition = 0;
490                        maxRepetition = Integer.MAX_VALUE;
491                        ip++;
492                        break;
493                    case PARSED_MODIFIER_ONE_OR_MORE:
494                        minRepetition = 1;
495                        maxRepetition = Integer.MAX_VALUE;
496                        ip++;
497                        break;
498                    case PARSED_MODIFIER_RANGE_START:
499                        minRepetition = parsedPattern[++ip];
500                        maxRepetition = parsedPattern[++ip];
501                        ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
502                        break;
503                    default:
504                        minRepetition = maxRepetition = 1; // implied literal
505                        break;
506                }
507            }
508            if (minRepetition > maxRepetition) {
509                return false;
510            }
511
512            // attempt to match as many characters as possible
513            int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
514                    parsedPattern, charSetStart, charSetEnd);
515
516            // if we found a conflict, return false immediately
517            if (matched == NO_MATCH) {
518                return false;
519            }
520
521            // move the match pointer the number of characters matched
522            im += matched;
523        }
524        return ip >= LP && im >= LM; // have parsed entire string and regex
525    }
526
527    private static int matchChars(String match, int im, final int lm, int tokenType,
528            int minRepetition, int maxRepetition, int[] parsedPattern,
529            int tokenStart, int tokenEnd) {
530        int matched = 0;
531
532        while(matched < maxRepetition
533                && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
534                    tokenEnd)) {
535            matched++;
536        }
537
538        return matched < minRepetition ? NO_MATCH : matched;
539    }
540
541    private static boolean matchChar(String match, int im, final int lm, int tokenType,
542            int[] parsedPattern, int tokenStart, int tokenEnd) {
543        if (im >= lm) { // we've overrun the string, no match
544            return false;
545        }
546        switch (tokenType) {
547            case TOKEN_TYPE_ANY:
548                return true;
549            case TOKEN_TYPE_SET:
550                for (int i = tokenStart; i < tokenEnd; i += 2) {
551                    char matchChar = match.charAt(im);
552                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
553                        return true;
554                    }
555                }
556                return false;
557            case TOKEN_TYPE_INVERSE_SET:
558                for (int i = tokenStart; i < tokenEnd; i += 2) {
559                    char matchChar = match.charAt(im);
560                    if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
561                        return false;
562                    }
563                }
564                return true;
565            case TOKEN_TYPE_LITERAL:
566                return match.charAt(im) == parsedPattern[tokenStart];
567            default:
568                return false;
569        }
570    }
571}