152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/* 2ca1e43d6e1fac07c7fc29c66c7da1fa9d7cf50f2Shaopeng Jia * Copyright (C) 2011 The Libphonenumber Authors 352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Licensed under the Apache License, Version 2.0 (the "License"); 552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * you may not use this file except in compliance with the License. 652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * You may obtain a copy of the License at 752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * http://www.apache.org/licenses/LICENSE-2.0 952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 1052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Unless required by applicable law or agreed to in writing, software 1152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * distributed under the License is distributed on an "AS IS" BASIS, 1252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * See the License for the specific language governing permissions and 1452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * limitations under the License. 1552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 1652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 1793f6965c2c041ac707bf1b3bcf5a3f60e452f421Shaopeng Jiapackage com.android.i18n.phonenumbers; 1852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 1993f6965c2c041ac707bf1b3bcf5a3f60e452f421Shaopeng Jiaimport com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency; 20b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jiaimport com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType; 21b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jiaimport com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; 22b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jiaimport com.android.i18n.phonenumbers.Phonemetadata.NumberFormat; 23b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jiaimport com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 24b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jiaimport com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource; 2593f6965c2c041ac707bf1b3bcf5a3f60e452f421Shaopeng Jiaimport com.android.i18n.phonenumbers.Phonenumber.PhoneNumber; 2652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 27d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jiaimport java.lang.Character.UnicodeBlock; 2852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.Iterator; 2952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.NoSuchElementException; 3052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Matcher; 3152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Pattern; 3252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 3352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/** 3452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}. 3552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in 3652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * {@link PhoneNumberUtil}. 3752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 3852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are 3952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * not found. 4052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 4152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>This class is not thread-safe. 4252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 4352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @author Tom Hofmann 4452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 4552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiafinal class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> { 4652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 4752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The phone number pattern used by {@link #find}, similar to 4852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: 4952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <ul> 5052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>All captures are limited in order to place an upper bound to the text matched by the 5152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * pattern. 5252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <ul> 5352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Leading punctuation / plus signs are limited. 5452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Consecutive occurrences of punctuation are limited. 5552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>Number of digits is limited. 5652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * </ul> 5752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>No whitespace is allowed at the start or end. 5852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. 5952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * </ul> 6052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 6152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static final Pattern PATTERN; 6252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 6352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Matches strings that look like publication pages. Example: 6452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. 6552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> 6652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 6752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The string "211-227 (2003)" is not a telephone number. 6852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 6952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"); 7052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 71372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 72372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 73372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * 08/31/95. 74372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 75372bff8dd464574d36737d47e495cad14346653cShaopeng Jia private static final Pattern SLASH_SEPARATED_DATES = 76372bff8dd464574d36737d47e495cad14346653cShaopeng Jia Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"); 77372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 78372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 79cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the 80cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. 81cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia */ 82cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia private static final Pattern TIME_STAMPS = 83cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d [0-2]\\d$"); 84cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d"); 85cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia 86cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia /** 87372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Pattern to check that brackets match. Opening brackets should be closed within a phone number. 88372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * This also checks that there is something inside the brackets. Having no brackets at all is also 89372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * fine. 90372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 91372bff8dd464574d36737d47e495cad14346653cShaopeng Jia private static final Pattern MATCHING_BRACKETS; 92372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 93372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /** 94372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Matches white-space, which may indicate the end of a phone number and the start of something 95f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * else (such as a neighbouring zip-code). If white-space is found, continues to match all 96f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * characters that are not typically used to start a phone number. 97372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 98f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia private static final Pattern GROUP_SEPARATOR; 99372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 100d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /** 101d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Punctuation that may be at the start of a phone number - brackets and plus signs. 102d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia */ 103d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia private static final Pattern LEAD_CLASS; 104d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 10552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia static { 106372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist 107372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * to make the pattern more easily understood. */ 108372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 109372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String openingParens = "(\\[\uFF08\uFF3B"; 110372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String closingParens = ")\\]\uFF09\uFF3D"; 111372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String nonParens = "[^" + openingParens + closingParens + "]"; 112372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 113372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* Limit on the number of pairs of brackets in a phone number. */ 114372bff8dd464574d36737d47e495cad14346653cShaopeng Jia String bracketPairLimit = limit(0, 3); 115372bff8dd464574d36737d47e495cad14346653cShaopeng Jia /* 116372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's 117372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a 118372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * closing bracket first. We limit the sets of brackets in a phone number to four. 119372bff8dd464574d36737d47e495cad14346653cShaopeng Jia */ 120372bff8dd464574d36737d47e495cad14346653cShaopeng Jia MATCHING_BRACKETS = Pattern.compile( 121372bff8dd464574d36737d47e495cad14346653cShaopeng Jia "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" + 122372bff8dd464574d36737d47e495cad14346653cShaopeng Jia nonParens + "+" + 123372bff8dd464574d36737d47e495cad14346653cShaopeng Jia "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit + 124372bff8dd464574d36737d47e495cad14346653cShaopeng Jia nonParens + "*"); 12552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 12652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Limit on the number of leading (plus) characters. */ 12752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String leadLimit = limit(0, 2); 12852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Limit on the number of consecutive punctuation characters. */ 12952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String punctuationLimit = limit(0, 4); 13052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a 13152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * single block, set high enough to accommodate the entire national number and the international 13252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * country code. */ 13352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia int digitBlockLimit = 13452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; 135d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some 13652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * formats use spaces to separate each digit. */ 13752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String blockLimit = limit(0, digitBlockLimit); 13852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 13952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* A punctuation sequence allowing white space. */ 14052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit; 14152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* A digits block without punctuation. */ 14252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit); 143d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 144f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS; 145f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia String leadClass = "[" + leadClassChars + "]"; 146d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia LEAD_CLASS = Pattern.compile(leadClass); 147f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia GROUP_SEPARATOR = Pattern.compile("\\p{Z}" + "[^" + leadClassChars + "\\p{Nd}]*"); 14852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 14952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /* Phone number pattern allowing optional punctuation. */ 15052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PATTERN = Pattern.compile( 15152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia "(?:" + leadClass + punctuation + ")" + leadLimit + 15252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit + 153f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?", 15452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberUtil.REGEX_FLAGS); 15552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 15652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 15752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** Returns a regular expression quantifier with an upper and lower limit. */ 15852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static String limit(int lower, int upper) { 15952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if ((lower < 0) || (upper <= 0) || (upper < lower)) { 16052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new IllegalArgumentException(); 16152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 16252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return "{" + lower + "," + upper + "}"; 16352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 16452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 16552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The potential states of a PhoneNumberMatcher. */ 16652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private enum State { 16752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia NOT_READY, READY, DONE 16852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 16952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 17052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The phone number utility. */ 171d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia private final PhoneNumberUtil phoneUtil; 17252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The text searched for phone numbers. */ 17352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final CharSequence text; 17452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 17552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * The region (country) to assume for phone numbers without an international prefix, possibly 17652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * null. 17752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 17852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final String preferredRegion; 17952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The degree of validation requested. */ 18052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private final Leniency leniency; 18152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The maximum number of retries after matching an invalid number. */ 18252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private long maxTries; 18352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 18452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The iteration tristate. */ 18552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private State state = State.NOT_READY; 18652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The last successful match, null unless in {@link State#READY}. */ 18752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch lastMatch = null; 18852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** The next index to start searching at. Undefined in {@link State#DONE}. */ 18952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private int searchIndex = 0; 19052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 19152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 19252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a 19352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * new instance. 19452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 19552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param util the phone number util to use 19652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param text the character sequence that we will search, null for no text 197f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * @param country the country to assume for phone numbers not written in international format 198f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * (with a leading plus, or with the international dialing prefix of the 199f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * specified region). May be null or "ZZ" if only numbers with a 200f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia * leading plus should be considered. 20152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param leniency the leniency to use when evaluating candidate phone numbers 20252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param maxTries the maximum number of invalid numbers to try before giving up on the text. 20352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * This is to cover degenerate cases where the text has a lot of false positives 20452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * in it. Must be {@code >= 0}. 20552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 20652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, 20752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia long maxTries) { 20852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 20952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if ((util == null) || (leniency == null)) { 21052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new NullPointerException(); 21152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 21252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (maxTries < 0) { 21352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new IllegalArgumentException(); 21452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 215d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia this.phoneUtil = util; 21652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.text = (text != null) ? text : ""; 21752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.preferredRegion = country; 21852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.leniency = leniency; 21952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia this.maxTries = maxTries; 22052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 22152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 22252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 22352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} 22452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * that represents a phone number. Returns the next match, null if none was found. 22552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 22652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param index the search index to start searching at 22752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the phone number match found, null if none can be found 22852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 22952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch find(int index) { 23052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia Matcher matcher = PATTERN.matcher(text); 23152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia while ((maxTries > 0) && matcher.find(index)) { 23252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia int start = matcher.start(); 23352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia CharSequence candidate = text.subSequence(start, matcher.end()); 23452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 23552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // Check for extra numbers at the end. 23652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // TODO: This is the place to start when trying to support extraction of multiple phone number 23752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // from split notations (+41 79 123 45 67 / 68). 23852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); 23952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 24052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatch match = extractMatch(candidate, start); 24152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (match != null) { 24252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return match; 24352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 24452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 24552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia index = start + candidate.length(); 24652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia maxTries--; 24752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 24852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 24952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 25052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 25152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 25252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 25352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Trims away any characters after the first match of {@code pattern} in {@code candidate}, 25452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * returning the trimmed version. 25552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 25652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) { 25752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia Matcher trailingCharsMatcher = pattern.matcher(candidate); 25852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (trailingCharsMatcher.find()) { 25952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia candidate = candidate.subSequence(0, trailingCharsMatcher.start()); 26052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 26152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return candidate; 26252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 26352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 26452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 265d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Helper method to determine if a character is a Latin-script letter or not. For our purposes, 266d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * combining marks should also return true since we assume they have been added to a preceding 267d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia * Latin character. 268d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia */ 269a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // @VisibleForTesting 270d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia static boolean isLatinLetter(char letter) { 271d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia // Combining marks are a subset of non-spacing-mark. 272d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { 273d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia return false; 274d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia } 275d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia UnicodeBlock block = UnicodeBlock.of(letter); 276d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia return block.equals(UnicodeBlock.BASIC_LATIN) || 277d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) || 278d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_A) || 279d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) || 280d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.LATIN_EXTENDED_B) || 281d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); 282d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia } 283d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia 28496a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia private static boolean isInvalidPunctuationSymbol(char character) { 28596a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL; 286f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 287f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 288d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia /** 28952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Attempts to extract a match from a {@code candidate} character sequence. 29052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 29152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate text that might contain a phone number 29252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param offset the offset of {@code candidate} within {@link #text} 29352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the match found, null if none can be found 29452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 29552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) { 296372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // Skip a match that is more likely a publication page reference or a date. 297372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (PUB_PAGES.matcher(candidate).find() || SLASH_SEPARATED_DATES.matcher(candidate).find()) { 29852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 29952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 300cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia // Skip potential time-stamps. 301cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia if (TIME_STAMPS.matcher(candidate).find()) { 302cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia String followingText = text.toString().substring(offset + candidate.length()); 303cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) { 304cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia return null; 305cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia } 306cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia } 30752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 30852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // Try to come up with a valid match given the entire candidate. 30952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia String rawString = candidate.toString(); 31052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia PhoneNumberMatch match = parseAndVerify(rawString, offset); 31152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (match != null) { 31252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return match; 31352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 31452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 315372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // If that failed, try to find an "inner match" - there might be a phone number within this 316372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // candidate. 31752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return extractInnerMatch(rawString, offset); 31852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 31952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 32052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 321372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a 322372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * match. 32352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 32452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate text that might contain a phone number 325372bff8dd464574d36737d47e495cad14346653cShaopeng Jia * @param offset the current offset of {@code candidate} within {@link #text} 32652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the match found, null if none can be found 32752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 32852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch extractInnerMatch(String candidate, int offset) { 329372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // Try removing either the first or last "group" in the number and see if this gives a result. 330d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia // We consider white space to be a possible indication of the start or end of the phone number. 331372bff8dd464574d36737d47e495cad14346653cShaopeng Jia Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate); 332372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 333372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (groupMatcher.find()) { 334f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // Try the first group by itself. 335f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia CharSequence firstGroupOnly = candidate.substring(0, groupMatcher.start()); 336f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia firstGroupOnly = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 337f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia firstGroupOnly); 338f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia PhoneNumberMatch match = parseAndVerify(firstGroupOnly.toString(), offset); 339f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (match != null) { 340f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return match; 341f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 342f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia maxTries--; 343f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 344f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia int withoutFirstGroupStart = groupMatcher.end(); 345f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // Try the rest of the candidate without the first group. 346f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia CharSequence withoutFirstGroup = candidate.substring(withoutFirstGroupStart); 347372bff8dd464574d36737d47e495cad14346653cShaopeng Jia withoutFirstGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 348372bff8dd464574d36737d47e495cad14346653cShaopeng Jia withoutFirstGroup); 349f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia match = parseAndVerify(withoutFirstGroup.toString(), offset + withoutFirstGroupStart); 35052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia if (match != null) { 35152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return match; 35252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 35352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia maxTries--; 354372bff8dd464574d36737d47e495cad14346653cShaopeng Jia 355372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (maxTries > 0) { 356f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia int lastGroupStart = withoutFirstGroupStart; 357372bff8dd464574d36737d47e495cad14346653cShaopeng Jia while (groupMatcher.find()) { 358372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // Find the last group. 359372bff8dd464574d36737d47e495cad14346653cShaopeng Jia lastGroupStart = groupMatcher.start(); 360372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 361372bff8dd464574d36737d47e495cad14346653cShaopeng Jia CharSequence withoutLastGroup = candidate.substring(0, lastGroupStart); 362372bff8dd464574d36737d47e495cad14346653cShaopeng Jia withoutLastGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 363372bff8dd464574d36737d47e495cad14346653cShaopeng Jia withoutLastGroup); 364f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (withoutLastGroup.equals(firstGroupOnly)) { 365f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // If there are only two groups, then the group "without the last group" is the same as 366f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // the first group. In these cases, we don't want to re-check the number group, so we exit 367f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // already. 368f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return null; 369f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 370372bff8dd464574d36737d47e495cad14346653cShaopeng Jia match = parseAndVerify(withoutLastGroup.toString(), offset); 371372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (match != null) { 372372bff8dd464574d36737d47e495cad14346653cShaopeng Jia return match; 373372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 374372bff8dd464574d36737d47e495cad14346653cShaopeng Jia maxTries--; 375372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 37652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 37752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 37852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 37952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 38052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 38152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and 38252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a 38352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. 38452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * 38552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param candidate the candidate match 38652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @param offset the offset of {@code candidate} within {@link #text} 38752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * @return the parsed and validated phone number match, or null 38852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 38952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia private PhoneNumberMatch parseAndVerify(String candidate, int offset) { 39052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia try { 391372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // Check the candidate doesn't contain any formatting which would indicate that it really 392372bff8dd464574d36737d47e495cad14346653cShaopeng Jia // isn't a phone number. 393372bff8dd464574d36737d47e495cad14346653cShaopeng Jia if (!MATCHING_BRACKETS.matcher(candidate).matches()) { 394372bff8dd464574d36737d47e495cad14346653cShaopeng Jia return null; 395372bff8dd464574d36737d47e495cad14346653cShaopeng Jia } 396f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 397f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded 398f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. 399f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (leniency.compareTo(Leniency.VALID) >= 0) { 400f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // If the candidate is not at the start of the text, and does not start with phone-number 401f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia // punctuation, check the previous character. 402f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) { 403f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia char previousChar = text.charAt(offset - 1); 40496a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia // We return null if it is a latin letter or an invalid punctuation symbol. 40596a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { 406f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return null; 407f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 408f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 409f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia int lastCharIndex = offset + candidate.length(); 410f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (lastCharIndex < text.length()) { 411f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia char nextChar = text.charAt(lastCharIndex); 41296a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { 413f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia return null; 414f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 415f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 416f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia } 417f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia 418a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); 419f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia if (leniency.verify(number, candidate, phoneUtil)) { 420a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // We used parseAndKeepRawInput to create this number, but for now we don't return the extra 421a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // values parsed. TODO: stop clearing all values here and switch all users over 422a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia // to using rawInput() rather than the rawString() of PhoneNumberMatch. 423a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearCountryCodeSource(); 424a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearRawInput(); 425a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia number.clearPreferredDomesticCarrierCode(); 42652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return new PhoneNumberMatch(offset, candidate, number); 42752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 42852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } catch (NumberParseException e) { 42952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia // ignore and continue 43052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 43152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia return null; 43252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 43352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia 43452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia /** 435b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Small helper interface such that the number groups can be checked according to different 4364b867acb917b73e699a596df94445c634c916519Shaopeng Jia * criteria, both for our default way of performing formatting and for any alternate formats we 4374b867acb917b73e699a596df94445c634c916519Shaopeng Jia * may want to check. 438b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 439b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia interface NumberGroupingChecker { 440b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 441b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Returns true if the groups of digits found in our candidate phone number match our 442b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * expectations. 443b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * 444b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param number the original number we found when parsing 445b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param normalizedCandidate the candidate number, normalized to only contain ASCII digits, 446b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * but with non-digits (spaces etc) retained 447b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * @param expectedNumberGroups the groups of digits that we would expect to see if we 448b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * formatted this number 449b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 450b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia boolean checkGroups(PhoneNumberUtil util, PhoneNumber number, 451b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, String[] expectedNumberGroups); 452b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 453b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 454b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util, 455b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, 456b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, 457b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups) { 458b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int fromIndex = 0; 459b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check each group of consecutive digits are not broken into separate groupings in the 460b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // {@code normalizedCandidate} string. 461b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int i = 0; i < formattedNumberGroups.length; i++) { 462b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} 463b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // doesn't contain the consecutive digits in formattedNumberGroups[i]. 464b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex); 465b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (fromIndex < 0) { 466b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 467b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 468b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Moves {@code fromIndex} forward. 469b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia fromIndex += formattedNumberGroups[i].length(); 470b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (i == 0 && fromIndex < normalizedCandidate.length()) { 471b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We are at the position right after the NDC. 472b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (Character.isDigit(normalizedCandidate.charAt(fromIndex))) { 473b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This means there is no formatting symbol after the NDC. In this case, we only 474b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // accept the number if there is no formatting symbol at all in the number, except 475b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // for extensions. 476b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalSignificantNumber = util.getNationalSignificantNumber(number); 477b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length()) 478b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia .startsWith(nationalSignificantNumber); 479b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 480b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 481b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 482b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The check here makes sure that we haven't mistakenly already used the extension to 483b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // match the last group of the subscriber number. Note the extension cannot have 484b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // formatting in-between digits. 485b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return normalizedCandidate.substring(fromIndex).contains(number.getExtension()); 486b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 487b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 488b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, 489b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, 490b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate, 491b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups) { 492b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] candidateGroups = 493b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString()); 494b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Set this to the last group, skipping it if the number has an extension. 495b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int candidateNumberGroupIndex = 496b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1; 497b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // First we check if the national significant number is formatted as a block. 498b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We use contains and not equals, since the national significant number may be present with 499b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // a prefix such as a national number prefix, or the country code itself. 500b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (candidateGroups.length == 1 || 501b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateGroups[candidateNumberGroupIndex].contains( 502b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.getNationalSignificantNumber(number))) { 503b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 504b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 505b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Starting from the end, go through in reverse, excluding the first group, and check the 506b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // candidate and number groups are the same. 507b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1); 508b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0; 509b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroupIndex--, candidateNumberGroupIndex--) { 510b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (!candidateGroups[candidateNumberGroupIndex].equals( 511b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattedNumberGroups[formattedNumberGroupIndex])) { 512b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 513b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 514b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 515b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Now check the first group. There may be a national prefix at the start, so we only check 516b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // that the candidate group ends with the formatted number group. 517b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return (candidateNumberGroupIndex >= 0 && 518b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0])); 519b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 520b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 521b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 522b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * Helper method to get the national-number part of a number, formatted without any national 523b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia * prefix, and return it as a set of digit blocks that would be formatted together. 524b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia */ 525b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, 526b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia NumberFormat formattingPattern) { 527b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (formattingPattern == null) { 528b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits. 529b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); 530b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We remove the extension part from the formatted string before splitting it into different 531b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // groups. 532b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int endIndex = rfc3966Format.indexOf(';'); 533b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (endIndex < 0) { 534b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia endIndex = rfc3966Format.length(); 535b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 536b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The country-code will have a '-' following it. 537b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int startIndex = rfc3966Format.indexOf('-') + 1; 538b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return rfc3966Format.substring(startIndex, endIndex).split("-"); 539b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else { 540b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We format the NSN only, and split that according to the separator. 541b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalSignificantNumber = util.getNationalSignificantNumber(number); 542b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return util.formatNsnUsingPattern(nationalSignificantNumber, 543b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia formattingPattern, PhoneNumberFormat.RFC3966).split("-"); 544b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 545b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 546b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 547b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean checkNumberGroupingIsValid( 548b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) { 549b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions) 550b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // and optimise if necessary. 551b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder normalizedCandidate = 552b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); 553b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String[] formattedNumberGroups = getNationalNumberGroups(util, number, null); 554b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 555b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 556b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 5574b867acb917b73e699a596df94445c634c916519Shaopeng Jia // If this didn't pass, see if there are any alternate formats, and try them instead. 5584b867acb917b73e699a596df94445c634c916519Shaopeng Jia PhoneMetadata alternateFormats = 5594b867acb917b73e699a596df94445c634c916519Shaopeng Jia MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); 5604b867acb917b73e699a596df94445c634c916519Shaopeng Jia if (alternateFormats != null) { 5614b867acb917b73e699a596df94445c634c916519Shaopeng Jia for (NumberFormat alternateFormat : alternateFormats.numberFormats()) { 5624b867acb917b73e699a596df94445c634c916519Shaopeng Jia formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); 5634b867acb917b73e699a596df94445c634c916519Shaopeng Jia if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 5644b867acb917b73e699a596df94445c634c916519Shaopeng Jia return true; 5654b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 5664b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 5674b867acb917b73e699a596df94445c634c916519Shaopeng Jia } 568b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 569b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 570b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 571b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean containsMoreThanOneSlash(String candidate) { 572b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia int firstSlashIndex = candidate.indexOf('/'); 573b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return (firstSlashIndex > 0 && candidate.substring(firstSlashIndex + 1).contains("/")); 574b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 575b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 576b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean containsOnlyValidXChars( 577b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumber number, String candidate, PhoneNumberUtil util) { 578b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the 579b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // national significant number or (2) an extension sign, in which case they always precede the 580b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // extension number. We assume a carrier code is more than 1 digit, so the first case has to 581b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x' 582b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // or 'X'. We ignore the character if it appears as the last character of the string. 583b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia for (int index = 0; index < candidate.length() - 1; index++) { 584b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia char charAtIndex = candidate.charAt(index); 585b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (charAtIndex == 'x' || charAtIndex == 'X') { 586b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia char charAtNextIndex = candidate.charAt(index + 1); 587b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (charAtNextIndex == 'x' || charAtNextIndex == 'X') { 588b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This is the carrier code case, in which the 'X's always precede the national 589b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // significant number. 590b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia index++; 591b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) { 592b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 593b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 594b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // This is the extension sign case, in which the 'x' or 'X' should always precede the 595b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // extension number. 596b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( 597b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia number.getExtension())) { 598b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return false; 599b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 600b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 601b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 602b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 603b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 604b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 605b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) { 606b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // First, check how we deduced the country code. If it was written in international format, then 607b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // the national prefix is not required. 608b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 609b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 610b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 611b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String phoneNumberRegion = 612b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.getRegionCodeForCountryCode(number.getCountryCode()); 613b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion); 614b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (metadata == null) { 615b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 616b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 617b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check if a national prefix should be present when formatting this number. 618b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String nationalNumber = util.getNationalSignificantNumber(number); 619b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia NumberFormat formatRule = 620b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber); 621b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // To do this, we check that a national prefix formatting rule was present and that it wasn't 622b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // just the first-group symbol ($1) with punctuation. 623b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) { 624b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (formatRule.isNationalPrefixOptionalWhenFormatting()) { 625b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // The national-prefix is optional in these cases, so we don't need to check if it was 626b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // present. 627b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 628b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 629b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Remove the first-group symbol. 630b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String candidateNationalPrefixRule = formatRule.getNationalPrefixFormattingRule(); 631b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // We assume that the first-group symbol will never be _before_ the national prefix. 632b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateNationalPrefixRule = 633b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateNationalPrefixRule.substring(0, candidateNationalPrefixRule.indexOf("$1")); 634b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia candidateNationalPrefixRule = 635b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberUtil.normalizeDigitsOnly(candidateNationalPrefixRule); 636b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (candidateNationalPrefixRule.length() == 0) { 637b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // National Prefix not needed for this number. 638b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 639b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 640b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Normalize the remainder. 641b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); 642b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia StringBuilder rawInput = new StringBuilder(rawInputCopy); 643b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check if we found a national prefix and/or carrier code at the start of the raw input, and 644b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // return the result. 645b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null); 646b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 647b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return true; 648b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 649b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 650b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia public boolean hasNext() { 651b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (state == State.NOT_READY) { 652b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia lastMatch = find(searchIndex); 653b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (lastMatch == null) { 654b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.DONE; 655b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } else { 656b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia searchIndex = lastMatch.end(); 657b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.READY; 658b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 659b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 660b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return state == State.READY; 661b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 662b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 663b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia public PhoneNumberMatch next() { 664b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Check the state and find the next match as a side-effect if necessary. 665b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia if (!hasNext()) { 666b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia throw new NoSuchElementException(); 667b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 668b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 669b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia // Don't retain that memory any longer than necessary. 670b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia PhoneNumberMatch result = lastMatch; 671b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia lastMatch = null; 672b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia state = State.NOT_READY; 673b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia return result; 674b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia } 675b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia 676b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia /** 67752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Always throws {@link UnsupportedOperationException} as removal is not supported. 67852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */ 67952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia public void remove() { 68052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia throw new UnsupportedOperationException(); 68152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia } 68252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia} 683