152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/* 2ca1e43d6e1fac07c7fc29c66c7da1fa9d7cf50f2Shaopeng Jia * Copyright (C) 2011 The Libphonenumber Authors 352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Licensed under the Apache License, Version 2.0 (the "License"); 552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * you may not use this file except in compliance with the License. 652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * You may obtain a copy of the License at 752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * http://www.apache.org/licenses/LICENSE-2.0 952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 1052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Unless required by applicable law or agreed to in writing, software 1152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * distributed under the License is distributed on an "AS IS" BASIS, 1252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * See the License for the specific language governing permissions and 1452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * limitations under the License. 1552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 1652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 17a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathpackage com.google.i18n.phonenumbers; 1852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 19a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency; 20a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType; 21a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; 22a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonemetadata.NumberFormat; 23a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 24a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource; 25a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonenumber.PhoneNumber; 2652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 27d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jiaimport java.lang.Character.UnicodeBlock; 2852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.Iterator; 2952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.NoSuchElementException; 3052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Matcher; 3152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Pattern; 3252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 3352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/** 3452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}. 3552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in 3652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * {@link PhoneNumberUtil}. 3752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 3852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are 3952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * not found. 4052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 4152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>This class is not thread-safe. 4252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 4352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiafinal class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> { 4452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 4552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The phone number pattern used by {@link #find}, similar to 4652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: 4752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <ul> 4852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>All captures are limited in order to place an upper bound to the text matched by the 4952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * pattern. 5052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <ul> 5152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Leading punctuation / plus signs are limited. 5252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Consecutive occurrences of punctuation are limited. 5352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Number of digits is limited. 5452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * </ul> 5552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>No whitespace is allowed at the start or end. 5652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. 5752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * </ul> 5852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 5952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static final Pattern PATTERN; 6052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 6152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Matches strings that look like publication pages. Example: 6252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. 6352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> 6452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 6552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The string "211-227 (2003)" is not a telephone number. 6652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 6752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"); 6852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 69372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 70372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 71372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * 08/31/95. 72372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 73372bff8dd464574d36737d47e495cad14346653cShaopeng Jia private static final Pattern SLASH_SEPARATED_DATES = 74372bff8dd464574d36737d47e495cad14346653cShaopeng Jia Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"); 75372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 76372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 77cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the 78cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. 79cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia */ 80cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia private static final Pattern TIME_STAMPS = 817a81979b0076f18b31b47b7df2beac29735f3a37Cecilia Roes Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$"); 82cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d"); 83cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia 84cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia /** 85372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Pattern to check that brackets match. Opening brackets should be closed within a phone number. 86372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * This also checks that there is something inside the brackets. Having no brackets at all is also 87372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * fine. 88372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 89372bff8dd464574d36737d47e495cad14346653cShaopeng Jia private static final Pattern MATCHING_BRACKETS; 90372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 91372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 92b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are 93b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * ordered according to specificity. For example, white-space is last since that is frequently 94b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * used in numbers, not just to separate two numbers. We have separate patterns since we don't 95b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * want to break up the phone-number-like text on more than one different kind of symbol at one 96b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * time, although symbols of the same type (e.g. space) can be safely grouped together. 97b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * 98b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * Note that if there is a match, we will always check any text found up to the first match as 99b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia * well. 100372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 101b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia private static final Pattern[] INNER_MATCHES = { 102b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Breaks on the slash - e.g. "651-234-2345/332-445-1234" 103b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("/+(.*)"), 104b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Note that the bracket here is inside the capturing group, since we consider it part of the 105b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321". 106b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("(\\([^(]*)"), 107b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number." 108b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // We require a space on either side of the hyphen for it to be considered a separator. 109b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"), 110b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's 111b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // possible that it's supposed to be used to break two numbers without spaces, and we haven't 112b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // seen many instances of it used within a number. 113b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"), 114b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number." 115b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("\\.+\\p{Z}*([^.]+)"), 116b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Breaks on space - e.g. "3324451234 8002341234" 117b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Pattern.compile("\\p{Z}+(\\P{Z}+)") 118b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia }; 119372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 120d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /** 121d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Punctuation that may be at the start of a phone number - brackets and plus signs. 122d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia */ 123d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia private static final Pattern LEAD_CLASS; 124d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 12552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia static { 126372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist 127372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * to make the pattern more easily understood. */ 128372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 129372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String openingParens = "(\\[\uFF08\uFF3B"; 130372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String closingParens = ")\\]\uFF09\uFF3D"; 131372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String nonParens = "[^" + openingParens + closingParens + "]"; 132372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 133372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* Limit on the number of pairs of brackets in a phone number. */ 134372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String bracketPairLimit = limit(0, 3); 135372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* 136372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's 137372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a 138372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * closing bracket first. We limit the sets of brackets in a phone number to four. 139372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 140372bff8dd464574d36737d47e495cad14346653cShaopeng Jia MATCHING_BRACKETS = Pattern.compile( 141372bff8dd464574d36737d47e495cad14346653cShaopeng Jia "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" + 142372bff8dd464574d36737d47e495cad14346653cShaopeng Jia nonParens + "+" + 143372bff8dd464574d36737d47e495cad14346653cShaopeng Jia "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit + 144372bff8dd464574d36737d47e495cad14346653cShaopeng Jia nonParens + "*"); 14552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 14652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Limit on the number of leading (plus) characters. */ 14752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String leadLimit = limit(0, 2); 14852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Limit on the number of consecutive punctuation characters. */ 14952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String punctuationLimit = limit(0, 4); 15052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a 15152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * single block, set high enough to accommodate the entire national number and the international 15252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * country code. */ 15352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia int digitBlockLimit = 15452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; 155d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some 15652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * formats use spaces to separate each digit. */ 15752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String blockLimit = limit(0, digitBlockLimit); 15852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 15952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* A punctuation sequence allowing white space. */ 16052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit; 16152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* A digits block without punctuation. */ 16252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit); 163d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 164f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS; 165f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia String leadClass = "[" + leadClassChars + "]"; 166d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia LEAD_CLASS = Pattern.compile(leadClass); 16752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 16852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Phone number pattern allowing optional punctuation. */ 16952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PATTERN = Pattern.compile( 17052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia "(?:" + leadClass + punctuation + ")" + leadLimit + 17152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit + 172f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?", 17352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberUtil.REGEX_FLAGS); 17452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 17552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 17652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** Returns a regular expression quantifier with an upper and lower limit. */ 17752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static String limit(int lower, int upper) { 17852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if ((lower < 0) || (upper <= 0) || (upper < lower)) { 17952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new IllegalArgumentException(); 18052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 18152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return "{" + lower + "," + upper + "}"; 18252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 18352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 18452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The potential states of a PhoneNumberMatcher. */ 18552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private enum State { 18652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia NOT_READY, READY, DONE 18752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 18852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 18952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The phone number utility. */ 190d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia private final PhoneNumberUtil phoneUtil; 19152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The text searched for phone numbers. */ 19252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final CharSequence text; 19352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 19452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The region (country) to assume for phone numbers without an international prefix, possibly 19552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * null. 19652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 19752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final String preferredRegion; 19852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The degree of validation requested. */ 19952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final Leniency leniency; 20052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The maximum number of retries after matching an invalid number. */ 20152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private long maxTries; 20252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 20352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The iteration tristate. */ 20452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private State state = State.NOT_READY; 20552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The last successful match, null unless in {@link State#READY}. */ 20652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch lastMatch = null; 20752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The next index to start searching at. Undefined in {@link State#DONE}. */ 20852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private int searchIndex = 0; 20952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 21052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 21152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a 21252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * new instance. 21352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 21452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param util the phone number util to use 21552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param text the character sequence that we will search, null for no text 216f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * @param country the country to assume for phone numbers not written in international format 217f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * (with a leading plus, or with the international dialing prefix of the 218f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * specified region). May be null or "ZZ" if only numbers with a 219f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * leading plus should be considered. 22052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param leniency the leniency to use when evaluating candidate phone numbers 22152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param maxTries the maximum number of invalid numbers to try before giving up on the text. 22252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * This is to cover degenerate cases where the text has a lot of false positives 22352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * in it. Must be {@code >= 0}. 22452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 22552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, 22652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia long maxTries) { 22752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 22852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if ((util == null) || (leniency == null)) { 22952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new NullPointerException(); 23052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 23152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (maxTries < 0) { 23252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new IllegalArgumentException(); 23352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 234d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia this.phoneUtil = util; 23552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.text = (text != null) ? text : ""; 23652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.preferredRegion = country; 23752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.leniency = leniency; 23852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.maxTries = maxTries; 23952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 24052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 24152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 24252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} 24352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * that represents a phone number. Returns the next match, null if none was found. 24452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 24552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param index the search index to start searching at 24652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the phone number match found, null if none can be found 24752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 24852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch find(int index) { 24952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia Matcher matcher = PATTERN.matcher(text); 25052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia while ((maxTries > 0) && matcher.find(index)) { 25152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia int start = matcher.start(); 25252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia CharSequence candidate = text.subSequence(start, matcher.end()); 25352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 25452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // Check for extra numbers at the end. 25552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // TODO: This is the place to start when trying to support extraction of multiple phone number 25652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // from split notations (+41 79 123 45 67 / 68). 25752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); 25852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 25952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatch match = extractMatch(candidate, start); 26052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (match != null) { 26152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return match; 26252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 26352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 26452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia index = start + candidate.length(); 26552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia maxTries--; 26652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 26752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 26852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 26952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 27052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 27152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 27252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Trims away any characters after the first match of {@code pattern} in {@code candidate}, 27352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * returning the trimmed version. 27452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 27552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) { 27652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia Matcher trailingCharsMatcher = pattern.matcher(candidate); 27752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (trailingCharsMatcher.find()) { 27852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia candidate = candidate.subSequence(0, trailingCharsMatcher.start()); 27952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 28052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return candidate; 28152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 28252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 28352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 284d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Helper method to determine if a character is a Latin-script letter or not. For our purposes, 285d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * combining marks should also return true since we assume they have been added to a preceding 286d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Latin character. 287d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia */ 288a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // @VisibleForTesting 289d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia static boolean isLatinLetter(char letter) { 290d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia // Combining marks are a subset of non-spacing-mark. 291d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { 292d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia return false; 293d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia } 294d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia UnicodeBlock block = UnicodeBlock.of(letter); 295d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia return block.equals(UnicodeBlock.BASIC_LATIN) || 296d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) || 297d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_A) || 298d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) || 299d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_B) || 300d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); 301d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia } 302d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 30396a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia private static boolean isInvalidPunctuationSymbol(char character) { 30496a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL; 305f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 306f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 307d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /** 30852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Attempts to extract a match from a {@code candidate} character sequence. 30952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 31052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate text that might contain a phone number 31152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param offset the offset of {@code candidate} within {@link #text} 31252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the match found, null if none can be found 31352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 31452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) { 315b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // Skip a match that is more likely to be a date. 316b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia if (SLASH_SEPARATED_DATES.matcher(candidate).find()) { 31752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 31852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 319b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia 320cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia // Skip potential time-stamps. 321cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia if (TIME_STAMPS.matcher(candidate).find()) { 322cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia String followingText = text.toString().substring(offset + candidate.length()); 323cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) { 324cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia return null; 325cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia } 326cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia } 32752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 32852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // Try to come up with a valid match given the entire candidate. 32952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String rawString = candidate.toString(); 33052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatch match = parseAndVerify(rawString, offset); 33152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (match != null) { 33252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return match; 33352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 33452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 335372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // If that failed, try to find an "inner match" - there might be a phone number within this 336372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // candidate. 33752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return extractInnerMatch(rawString, offset); 33852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 33952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 34052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 341372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a 342372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * match. 34352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 34452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate text that might contain a phone number 345372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * @param offset the current offset of {@code candidate} within {@link #text} 34652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the match found, null if none can be found 34752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 34852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch extractInnerMatch(String candidate, int offset) { 349b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia for (Pattern possibleInnerMatch : INNER_MATCHES) { 350b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia Matcher groupMatcher = possibleInnerMatch.matcher(candidate); 351b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia boolean isFirstMatch = true; 352b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia while (groupMatcher.find() && maxTries > 0) { 353b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia if (isFirstMatch) { 354b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia // We should handle any group before this one too. 355b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia CharSequence group = trimAfterFirstMatch( 356b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 357b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia candidate.substring(0, groupMatcher.start())); 358b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia PhoneNumberMatch match = parseAndVerify(group.toString(), offset); 359b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia if (match != null) { 360b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia return match; 361b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia } 362b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia maxTries--; 363b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia isFirstMatch = false; 364f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 365b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia CharSequence group = trimAfterFirstMatch( 366b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1)); 367b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1)); 368372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (match != null) { 369372bff8dd464574d36737d47e495cad14346653cShaopeng Jia return match; 370372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 371372bff8dd464574d36737d47e495cad14346653cShaopeng Jia maxTries--; 372372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 37352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 37452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 37552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 37652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 37752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 37852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and 37952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a 38052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. 38152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 38252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate match 38352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param offset the offset of {@code candidate} within {@link #text} 38452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the parsed and validated phone number match, or null 38552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 38652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch parseAndVerify(String candidate, int offset) { 38752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia try { 388372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // Check the candidate doesn't contain any formatting which would indicate that it really 389372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // isn't a phone number. 390b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) { 391372bff8dd464574d36737d47e495cad14346653cShaopeng Jia return null; 392372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 393f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 394f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded 395f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. 396f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (leniency.compareTo(Leniency.VALID) >= 0) { 397f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // If the candidate is not at the start of the text, and does not start with phone-number 398f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // punctuation, check the previous character. 399f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) { 400f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia char previousChar = text.charAt(offset - 1); 40196a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia // We return null if it is a latin letter or an invalid punctuation symbol. 40296a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { 403f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return null; 404f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 405f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 406f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia int lastCharIndex = offset + candidate.length(); 407f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (lastCharIndex < text.length()) { 408f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia char nextChar = text.charAt(lastCharIndex); 40996a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { 410f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return null; 411f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 412f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 413f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 414f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 415a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); 416b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia 417f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // Check Israel * numbers: these are a special case in that they are four-digit numbers that 418f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // our library supports, but they can only be dialled with a leading *. Since we don't 419f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // actually store or detect the * in our phone number library, this means in practice we 420f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // detect most four digit numbers as being valid for Israel. We are considering moving these 421f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the 422f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // meantime we want to restrict the false matches so we only allow these numbers if they are 423f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // preceded by a star. We enforce this for all leniency levels even though these numbers are 424f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a 425f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // deficiency in those methods that they accept these numbers without the *. 426f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // TODO: Remove this or make it significantly less hacky once we've decided how to 427f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // handle these short codes going forward in ShortNumberInfo. We could use the formatting 428f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // rules for instance, but that would be slower. 429f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL") && 430f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia phoneUtil.getNationalSignificantNumber(number).length() == 4 && 431f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) { 432f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // No match. 433f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia return null; 434f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia } 435f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia 436f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (leniency.verify(number, candidate, phoneUtil)) { 437a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // We used parseAndKeepRawInput to create this number, but for now we don't return the extra 438a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // values parsed. TODO: stop clearing all values here and switch all users over 439a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // to using rawInput() rather than the rawString() of PhoneNumberMatch. 440a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearCountryCodeSource(); 441a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearRawInput(); 442a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearPreferredDomesticCarrierCode(); 44352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return new PhoneNumberMatch(offset, candidate, number); 44452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 44552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } catch (NumberParseException e) { 44652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // ignore and continue 44752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 44852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 44952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 45052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 45152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 452b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Small helper interface such that the number groups can be checked according to different 4534b867acb917b73e699a596df94445c634c916519Shaopeng Jia * criteria, both for our default way of performing formatting and for any alternate formats we 4544b867acb917b73e699a596df94445c634c916519Shaopeng Jia * may want to check. 455b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 456b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia interface NumberGroupingChecker { 457b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 458b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Returns true if the groups of digits found in our candidate phone number match our 459b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * expectations. 460b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * 461b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param number the original number we found when parsing 462b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param normalizedCandidate the candidate number, normalized to only contain ASCII digits, 463b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * but with non-digits (spaces etc) retained 464b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param expectedNumberGroups the groups of digits that we would expect to see if we 465b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * formatted this number 466b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 467b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia boolean checkGroups(PhoneNumberUtil util, PhoneNumber number, 468b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, String[] expectedNumberGroups); 469b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 470b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 471b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util, 472b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, 473b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, 474b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups) { 475b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int fromIndex = 0; 476f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 477f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia // First skip the country code if the normalized candidate contained it. 478f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia String countryCode = Integer.toString(number.getCountryCode()); 479f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length(); 480f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia } 481b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check each group of consecutive digits are not broken into separate groupings in the 482b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // {@code normalizedCandidate} string. 483b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int i = 0; i < formattedNumberGroups.length; i++) { 484b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} 485b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // doesn't contain the consecutive digits in formattedNumberGroups[i]. 486b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex); 487b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (fromIndex < 0) { 488b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 489b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 490b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Moves {@code fromIndex} forward. 491b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia fromIndex += formattedNumberGroups[i].length(); 492b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (i == 0 && fromIndex < normalizedCandidate.length()) { 493bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // We are at the position right after the NDC. We get the region used for formatting 494bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // information based on the country code in the phone number, rather than the number itself, 495bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // as we do not need to distinguish between different countries with the same country 496bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // calling code and this is faster. 497bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia String region = util.getRegionCodeForCountryCode(number.getCountryCode()); 498bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia if (util.getNddPrefixForRegion(region, true) != null && 499bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia Character.isDigit(normalizedCandidate.charAt(fromIndex))) { 500b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This means there is no formatting symbol after the NDC. In this case, we only 501b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // accept the number if there is no formatting symbol at all in the number, except 502bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // for extensions. This is only important for countries with national prefixes. 503b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalSignificantNumber = util.getNationalSignificantNumber(number); 504b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length()) 505b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia .startsWith(nationalSignificantNumber); 506b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 507b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 508b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 509b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The check here makes sure that we haven't mistakenly already used the extension to 510b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // match the last group of the subscriber number. Note the extension cannot have 511b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // formatting in-between digits. 512b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return normalizedCandidate.substring(fromIndex).contains(number.getExtension()); 513b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 514b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 515b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, 516b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, 517b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, 518b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups) { 519b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] candidateGroups = 520b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString()); 521b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Set this to the last group, skipping it if the number has an extension. 522b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int candidateNumberGroupIndex = 523b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1; 524b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // First we check if the national significant number is formatted as a block. 525b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We use contains and not equals, since the national significant number may be present with 526b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // a prefix such as a national number prefix, or the country code itself. 527b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (candidateGroups.length == 1 || 528b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateGroups[candidateNumberGroupIndex].contains( 529b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.getNationalSignificantNumber(number))) { 530b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 531b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 532b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Starting from the end, go through in reverse, excluding the first group, and check the 533b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // candidate and number groups are the same. 534b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1); 535b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0; 536b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroupIndex--, candidateNumberGroupIndex--) { 537b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (!candidateGroups[candidateNumberGroupIndex].equals( 538b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroups[formattedNumberGroupIndex])) { 539b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 540b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 541b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 542b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Now check the first group. There may be a national prefix at the start, so we only check 543b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // that the candidate group ends with the formatted number group. 544b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return (candidateNumberGroupIndex >= 0 && 545b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0])); 546b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 547b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 548b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 549b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Helper method to get the national-number part of a number, formatted without any national 550b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * prefix, and return it as a set of digit blocks that would be formatted together. 551b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 552b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, 553b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia NumberFormat formattingPattern) { 554b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (formattingPattern == null) { 555b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits. 556b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); 557b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We remove the extension part from the formatted string before splitting it into different 558b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // groups. 559b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int endIndex = rfc3966Format.indexOf(';'); 560b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (endIndex < 0) { 561b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia endIndex = rfc3966Format.length(); 562b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 563b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The country-code will have a '-' following it. 564b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int startIndex = rfc3966Format.indexOf('-') + 1; 565b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return rfc3966Format.substring(startIndex, endIndex).split("-"); 566b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else { 567b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We format the NSN only, and split that according to the separator. 568b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalSignificantNumber = util.getNationalSignificantNumber(number); 569b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return util.formatNsnUsingPattern(nationalSignificantNumber, 570b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattingPattern, PhoneNumberFormat.RFC3966).split("-"); 571b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 572b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 573b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 574b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean checkNumberGroupingIsValid( 575b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) { 576b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions) 577b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // and optimise if necessary. 578b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate = 579b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); 580b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups = getNationalNumberGroups(util, number, null); 581b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 582b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 583b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 5844b867acb917b73e699a596df94445c634c916519Shaopeng Jia // If this didn't pass, see if there are any alternate formats, and try them instead. 5854b867acb917b73e699a596df94445c634c916519Shaopeng Jia PhoneMetadata alternateFormats = 5864b867acb917b73e699a596df94445c634c916519Shaopeng Jia MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); 5874b867acb917b73e699a596df94445c634c916519Shaopeng Jia if (alternateFormats != null) { 5884b867acb917b73e699a596df94445c634c916519Shaopeng Jia for (NumberFormat alternateFormat : alternateFormats.numberFormats()) { 5894b867acb917b73e699a596df94445c634c916519Shaopeng Jia formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); 5904b867acb917b73e699a596df94445c634c916519Shaopeng Jia if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 5914b867acb917b73e699a596df94445c634c916519Shaopeng Jia return true; 5924b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 5934b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 5944b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 595b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 596b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 597b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 598bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) { 599bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia int firstSlashInBodyIndex = candidate.indexOf('/'); 600bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia if (firstSlashInBodyIndex < 0) { 601bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // No slashes, this is okay. 602bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia return false; 603bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia } 604bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // Now look for a second one. 605bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1); 606bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia if (secondSlashInBodyIndex < 0) { 607bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // Only one slash, this is okay. 608bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia return false; 609bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia } 610bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia 611bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // If the first slash is after the country calling code, this is permitted. 612bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia boolean candidateHasCountryCode = 613bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN || 614bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN); 615bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia if (candidateHasCountryCode && 616bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex)) 617bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia .equals(Integer.toString(number.getCountryCode()))) { 618bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia // Any more slashes and this is illegal. 619bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia return candidate.substring(secondSlashInBodyIndex + 1).contains("/"); 620bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia } 621bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia return true; 622b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 623b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 624b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean containsOnlyValidXChars( 625b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, String candidate, PhoneNumberUtil util) { 626b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the 627b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // national significant number or (2) an extension sign, in which case they always precede the 628b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // extension number. We assume a carrier code is more than 1 digit, so the first case has to 629b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x' 630b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // or 'X'. We ignore the character if it appears as the last character of the string. 631b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int index = 0; index < candidate.length() - 1; index++) { 632b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia char charAtIndex = candidate.charAt(index); 633b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (charAtIndex == 'x' || charAtIndex == 'X') { 634b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia char charAtNextIndex = candidate.charAt(index + 1); 635b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (charAtNextIndex == 'x' || charAtNextIndex == 'X') { 636b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This is the carrier code case, in which the 'X's always precede the national 637b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // significant number. 638b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia index++; 639b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) { 640b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 641b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 642b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This is the extension sign case, in which the 'x' or 'X' should always precede the 643b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // extension number. 644b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( 645b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia number.getExtension())) { 646f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia return false; 647b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 648b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 649b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 650b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 651b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 652b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 653b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) { 654b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // First, check how we deduced the country code. If it was written in international format, then 655b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // the national prefix is not required. 656b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 657b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 658b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 659b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String phoneNumberRegion = 660b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.getRegionCodeForCountryCode(number.getCountryCode()); 661b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion); 662b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (metadata == null) { 663b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 664b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 665b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check if a national prefix should be present when formatting this number. 666b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalNumber = util.getNationalSignificantNumber(number); 667b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia NumberFormat formatRule = 668b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber); 669b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // To do this, we check that a national prefix formatting rule was present and that it wasn't 670b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // just the first-group symbol ($1) with punctuation. 671b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) { 672b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (formatRule.isNationalPrefixOptionalWhenFormatting()) { 673b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The national-prefix is optional in these cases, so we don't need to check if it was 674b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // present. 675b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 676b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 677bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly( 678bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia formatRule.getNationalPrefixFormattingRule())) { 679b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // National Prefix not needed for this number. 680b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 681b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 682b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Normalize the remainder. 683b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); 684b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder rawInput = new StringBuilder(rawInputCopy); 685b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check if we found a national prefix and/or carrier code at the start of the raw input, and 686b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // return the result. 687b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null); 688b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 689b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 690b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 691b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 692b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher @Override 693b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia public boolean hasNext() { 694b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (state == State.NOT_READY) { 695b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia lastMatch = find(searchIndex); 696b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (lastMatch == null) { 697b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.DONE; 698b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else { 699b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia searchIndex = lastMatch.end(); 700b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.READY; 701b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 702b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 703b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return state == State.READY; 704b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 705b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 706b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher @Override 707b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia public PhoneNumberMatch next() { 708b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check the state and find the next match as a side-effect if necessary. 709b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (!hasNext()) { 710b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia throw new NoSuchElementException(); 711b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 712b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 713b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Don't retain that memory any longer than necessary. 714b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberMatch result = lastMatch; 715b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia lastMatch = null; 716b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.NOT_READY; 717b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return result; 718b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 719b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 720b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 72152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Always throws {@link UnsupportedOperationException} as removal is not supported. 72252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 723b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher @Override 72452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia public void remove() { 72552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new UnsupportedOperationException(); 72652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 72752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia} 728