10510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes/*
20510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Copyright (C) 2010 The Android Open Source Project
30510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes *
40510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Licensed under the Apache License, Version 2.0 (the "License");
50510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * you may not use this file except in compliance with the License.
60510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * You may obtain a copy of the License at
70510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes *
80510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes *      http://www.apache.org/licenses/LICENSE-2.0
90510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes *
100510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Unless required by applicable law or agreed to in writing, software
110510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * distributed under the License is distributed on an "AS IS" BASIS,
120510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * See the License for the specific language governing permissions and
140510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * limitations under the License.
150510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes */
160510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
170510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughespackage java.util.regex;
180510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
190510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughesimport java.util.ArrayList;
200510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughesimport java.util.List;
210510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
22601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes/**
230510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
240510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * @hide
250510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes */
260510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughespublic class Splitter {
270510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
280510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    private static final String METACHARACTERS = "\\?*+[](){}^$.|";
290510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
300510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    private Splitter() {
310510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    }
320510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
330510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    /**
340510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes     * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
350510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes     * to compute it more cheaply than ICU, or null if the caller should fall back to
360510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes     * using ICU.
370510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes     */
380510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    public static String[] fastSplit(String re, String input, int limit) {
390510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Can we do it cheaply?
400510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int len = re.length();
410510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (len == 0) {
420510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            return null;
430510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
440510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        char ch = re.charAt(0);
450510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
460510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            // We're looking for a single non-metacharacter. Easy.
470510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        } else if (len == 2 && ch == '\\') {
480510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            // We're looking for a quoted character.
490510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            // Quoted metacharacters are effectively single non-metacharacters.
500510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            ch = re.charAt(1);
510510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            if (METACHARACTERS.indexOf(ch) == -1) {
520510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes                return null;
530510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            }
540510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        } else {
550510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            return null;
560510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
570510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
580510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // We can do this cheaply...
590510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
600510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Unlike Perl, which considers the result of splitting the empty string to be the empty
610510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // array, Java returns an array containing the empty string.
620510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (input.isEmpty()) {
630510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            return new String[] { "" };
640510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
650510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
660510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Collect text preceding each occurrence of the separator, while there's enough space.
670510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        ArrayList<String> list = new ArrayList<String>();
680510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
690510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int begin = 0;
700510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int end;
710510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) {
720510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            list.add(input.substring(begin, end));
730510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            begin = end + 1;
740510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
750510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        return finishSplit(list, input, begin, maxSize, limit);
760510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    }
770510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
780510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    public static String[] split(Pattern pattern, String re, String input, int limit) {
790510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        String[] fastResult = fastSplit(re, input, limit);
800510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (fastResult != null) {
810510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            return fastResult;
820510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
830510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
840510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Unlike Perl, which considers the result of splitting the empty string to be the empty
850510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // array, Java returns an array containing the empty string.
860510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (input.isEmpty()) {
870510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            return new String[] { "" };
880510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
890510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
900510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Collect text preceding each occurrence of the separator, while there's enough space.
910510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        ArrayList<String> list = new ArrayList<String>();
920510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit;
930510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        Matcher matcher = new Matcher(pattern, input);
940510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        int begin = 0;
950510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        while (matcher.find() && list.size() + 1 < maxSize) {
960510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            list.add(input.substring(begin, matcher.start()));
970510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            begin = matcher.end();
980510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
990510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        return finishSplit(list, input, begin, maxSize, limit);
1000510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    }
1010510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes
1020510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) {
103601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes        // Add trailing text.
104601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes        if (begin < input.length()) {
105601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes            list.add(input.substring(begin));
106601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes        } else if (limit != 0) { // No point adding the empty string if limit == 0, just to remove it below.
107601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes            list.add("");
1080510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
109601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes        // Remove all trailing empty matches in the limit == 0 case.
1100510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        if (limit == 0) {
1110510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            int i = list.size() - 1;
112601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes            while (i >= 0 && list.get(i).isEmpty()) {
1130510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes                list.remove(i);
1140510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes                i--;
1150510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes            }
1160510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        }
1170510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        // Convert to an array.
1180510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes        return list.toArray(new String[list.size()]);
1190510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes    }
1200510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes}
121