10510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes/* 20510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Copyright (C) 2010 The Android Open Source Project 30510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * 40510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Licensed under the Apache License, Version 2.0 (the "License"); 50510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * you may not use this file except in compliance with the License. 60510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * You may obtain a copy of the License at 70510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * 80510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * http://www.apache.org/licenses/LICENSE-2.0 90510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * 100510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Unless required by applicable law or agreed to in writing, software 110510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * distributed under the License is distributed on an "AS IS" BASIS, 120510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 130510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * See the License for the specific language governing permissions and 140510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * limitations under the License. 150510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes */ 160510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 170510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughespackage java.util.regex; 180510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 190510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughesimport java.util.ArrayList; 200510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughesimport java.util.List; 210510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 22601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes/** 230510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Used to make {@code String.split} fast (and to help {@code Pattern.split} too). 240510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * @hide 250510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes */ 260510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughespublic class Splitter { 270510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // The RI allows regular expressions beginning with ] or }, but that's probably a bug. 280510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes private static final String METACHARACTERS = "\\?*+[](){}^$.|"; 290510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 300510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes private Splitter() { 310510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 320510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 330510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes /** 340510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * Returns a result equivalent to {@code s.split(separator, limit)} if it's able 350510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * to compute it more cheaply than ICU, or null if the caller should fall back to 360510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes * using ICU. 370510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes */ 380510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes public static String[] fastSplit(String re, String input, int limit) { 390510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Can we do it cheaply? 400510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int len = re.length(); 410510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (len == 0) { 420510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return null; 430510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 440510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes char ch = re.charAt(0); 450510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (len == 1 && METACHARACTERS.indexOf(ch) == -1) { 460510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // We're looking for a single non-metacharacter. Easy. 470510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } else if (len == 2 && ch == '\\') { 480510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // We're looking for a quoted character. 490510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Quoted metacharacters are effectively single non-metacharacters. 500510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes ch = re.charAt(1); 510510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (METACHARACTERS.indexOf(ch) == -1) { 520510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return null; 530510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 540510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } else { 550510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return null; 560510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 570510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 580510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // We can do this cheaply... 590510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 600510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Unlike Perl, which considers the result of splitting the empty string to be the empty 610510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // array, Java returns an array containing the empty string. 620510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (input.isEmpty()) { 630510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return new String[] { "" }; 640510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 650510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 660510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Collect text preceding each occurrence of the separator, while there's enough space. 670510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes ArrayList<String> list = new ArrayList<String>(); 680510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit; 690510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int begin = 0; 700510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int end; 710510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes while ((end = input.indexOf(ch, begin)) != -1 && list.size() + 1 < maxSize) { 720510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes list.add(input.substring(begin, end)); 730510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes begin = end + 1; 740510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 750510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return finishSplit(list, input, begin, maxSize, limit); 760510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 770510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 780510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes public static String[] split(Pattern pattern, String re, String input, int limit) { 790510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes String[] fastResult = fastSplit(re, input, limit); 800510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (fastResult != null) { 810510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return fastResult; 820510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 830510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 840510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Unlike Perl, which considers the result of splitting the empty string to be the empty 850510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // array, Java returns an array containing the empty string. 860510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (input.isEmpty()) { 870510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return new String[] { "" }; 880510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 890510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 900510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Collect text preceding each occurrence of the separator, while there's enough space. 910510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes ArrayList<String> list = new ArrayList<String>(); 920510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int maxSize = limit <= 0 ? Integer.MAX_VALUE : limit; 930510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes Matcher matcher = new Matcher(pattern, input); 940510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int begin = 0; 950510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes while (matcher.find() && list.size() + 1 < maxSize) { 960510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes list.add(input.substring(begin, matcher.start())); 970510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes begin = matcher.end(); 980510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 990510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return finishSplit(list, input, begin, maxSize, limit); 1000510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 1010510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes 1020510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes private static String[] finishSplit(List<String> list, String input, int begin, int maxSize, int limit) { 103601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes // Add trailing text. 104601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes if (begin < input.length()) { 105601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes list.add(input.substring(begin)); 106601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes } else if (limit != 0) { // No point adding the empty string if limit == 0, just to remove it below. 107601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes list.add(""); 1080510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 109601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes // Remove all trailing empty matches in the limit == 0 case. 1100510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes if (limit == 0) { 1110510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes int i = list.size() - 1; 112601555d9c55a78a3d4aefdf8f50645545b8ab484Elliott Hughes while (i >= 0 && list.get(i).isEmpty()) { 1130510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes list.remove(i); 1140510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes i--; 1150510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 1160510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 1170510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes // Convert to an array. 1180510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes return list.toArray(new String[list.size()]); 1190510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes } 1200510f0d8ce7c20b8f6022545a70e8b868805dc60Elliott Hughes} 121