1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package java.util.regex;
18
19import java.util.ArrayList;
20import java.util.List;
21import libcore.util.EmptyArray;
22
23/**
24 * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
25 * @hide
26 */
27public class Splitter {
28    // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
29    private static final String METACHARACTERS = "\\?*+[](){}^$.|";
30
31    private Splitter() {
32    }
33
34    /**
35     * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
36     * to compute it more cheaply than ICU, or null if the caller should fall back to
37     * using ICU.
38     */
39    public static String[] fastSplit(String re, String input, int limit) {
40        // Can we do it cheaply?
41        int len = re.length();
42        if (len == 0) {
43            return null;
44        }
45        char ch = re.charAt(0);
46        if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
47            // We're looking for a single non-metacharacter. Easy.
48        } else if (len == 2 && ch == '\\') {
49            // We're looking for a quoted character.
50            // Quoted metacharacters are effectively single non-metacharacters.
51            ch = re.charAt(1);
52            if (METACHARACTERS.indexOf(ch) == -1) {
53                return null;
54            }
55        } else {
56            return null;
57        }
58
59        // We can do this cheaply...
60
61        // Unlike Perl, which considers the result of splitting the empty string to be the empty
62        // array, Java returns an array containing the empty string.
63        if (input.isEmpty()) {
64            return new String[] { "" };
65        }
66
67        // Count separators
68        int separatorCount = 0;
69        int begin = 0;
70        int end;
71        while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) {
72            ++separatorCount;
73            begin = end + 1;
74        }
75        int lastPartEnd = input.length();
76        if (limit == 0 && begin == lastPartEnd) {
77            // Last part is empty for limit == 0, remove all trailing empty matches.
78            if (separatorCount == lastPartEnd) {
79                // Input contains only separators.
80                return EmptyArray.STRING;
81            }
82            // Find the beginning of trailing separators.
83            do {
84                --begin;
85            } while (input.charAt(begin - 1) == ch);
86            // Reduce separatorCount and fix lastPartEnd.
87            separatorCount -= input.length() - begin;
88            lastPartEnd = begin;
89        }
90
91        // Collect the result parts.
92        String[] result = new String[separatorCount + 1];
93        begin = 0;
94        for (int i = 0; i != separatorCount; ++i) {
95            end = input.indexOf(ch, begin);
96            result[i] = input.substring(begin, end);
97            begin = end + 1;
98        }
99        // Add last part.
100        result[separatorCount] = input.substring(begin, lastPartEnd);
101        return result;
102    }
103
104    public static String[] split(Pattern pattern, String re, String input, int limit) {
105        String[] fastResult = fastSplit(re, input, limit);
106        if (fastResult != null) {
107            return fastResult;
108        }
109
110        // Unlike Perl, which considers the result of splitting the empty string to be the empty
111        // array, Java returns an array containing the empty string.
112        if (input.isEmpty()) {
113            return new String[] { "" };
114        }
115
116        // Collect text preceding each occurrence of the separator, while there's enough space.
117        ArrayList<String> list = new ArrayList<String>();
118        Matcher matcher = new Matcher(pattern, input);
119        int begin = 0;
120        while (list.size() + 1 != limit && matcher.find()) {
121            list.add(input.substring(begin, matcher.start()));
122            begin = matcher.end();
123        }
124        return finishSplit(list, input, begin, limit);
125    }
126
127    private static String[] finishSplit(List<String> list, String input, int begin, int limit) {
128        // Add trailing text.
129        if (begin < input.length()) {
130            list.add(input.substring(begin));
131        } else if (limit != 0) {
132            list.add("");
133        } else {
134            // Remove all trailing empty matches in the limit == 0 case.
135            int i = list.size() - 1;
136            while (i >= 0 && list.get(i).isEmpty()) {
137                list.remove(i);
138                i--;
139            }
140        }
141        // Convert to an array.
142        return list.toArray(new String[list.size()]);
143    }
144}
145