152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/*
2ca1e43d6e1fac07c7fc29c66c7da1fa9d7cf50f2Shaopeng Jia * Copyright (C) 2011 The Libphonenumber Authors
352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia *
452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Licensed under the Apache License, Version 2.0 (the "License");
552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * you may not use this file except in compliance with the License.
652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * You may obtain a copy of the License at
752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia *
852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * http://www.apache.org/licenses/LICENSE-2.0
952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia *
1052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Unless required by applicable law or agreed to in writing, software
1152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * distributed under the License is distributed on an "AS IS" BASIS,
1252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * See the License for the specific language governing permissions and
1452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * limitations under the License.
1552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */
1652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
17a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathpackage com.google.i18n.phonenumbers;
1852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
19a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
20a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType;
21a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
22a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
23a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
24a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
25a77faddfc3b3e4cca8f585c82d669054aec221f4Narayan Kamathimport com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
2652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
27d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jiaimport java.lang.Character.UnicodeBlock;
2852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.Iterator;
2952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.NoSuchElementException;
3052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Matcher;
3152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiaimport java.util.regex.Pattern;
3252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
3352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia/**
3452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
3552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
3652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * {@link PhoneNumberUtil}.
3752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia *
3852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
3952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * not found.
4052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia *
4152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia * <p>This class is not thread-safe.
4252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia */
4352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jiafinal class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
4452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
4552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * The phone number pattern used by {@link #find}, similar to
4652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
4752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * <ul>
4852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>All captures are limited in order to place an upper bound to the text matched by the
4952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *       pattern.
5052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * <ul>
5152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>Leading punctuation / plus signs are limited.
5252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>Consecutive occurrences of punctuation are limited.
5352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>Number of digits is limited.
5452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * </ul>
5552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>No whitespace is allowed at the start or end.
5652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
5752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * </ul>
5852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
5952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private static final Pattern PATTERN;
6052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
6152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Matches strings that look like publication pages. Example:
6252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
6352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
6452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
6552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * The string "211-227 (2003)" is not a telephone number.
6652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
6752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
6852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
69372bff8dd464574d36737d47e495cad14346653cShaopeng Jia  /**
70372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
71372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * 08/31/95.
72372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   */
73372bff8dd464574d36737d47e495cad14346653cShaopeng Jia  private static final Pattern SLASH_SEPARATED_DATES =
74372bff8dd464574d36737d47e495cad14346653cShaopeng Jia      Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
75372bff8dd464574d36737d47e495cad14346653cShaopeng Jia
76372bff8dd464574d36737d47e495cad14346653cShaopeng Jia  /**
77cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia   * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
78cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia   * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
79cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia   */
80cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia  private static final Pattern TIME_STAMPS =
817a81979b0076f18b31b47b7df2beac29735f3a37Cecilia Roes      Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$");
82cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia  private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
83cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia
84cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia  /**
85372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
86372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * This also checks that there is something inside the brackets. Having no brackets at all is also
87372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * fine.
88372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   */
89372bff8dd464574d36737d47e495cad14346653cShaopeng Jia  private static final Pattern MATCHING_BRACKETS;
90372bff8dd464574d36737d47e495cad14346653cShaopeng Jia
91372bff8dd464574d36737d47e495cad14346653cShaopeng Jia  /**
92b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
93b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * ordered according to specificity. For example, white-space is last since that is frequently
94b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * used in numbers, not just to separate two numbers. We have separate patterns since we don't
95b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * want to break up the phone-number-like text on more than one different kind of symbol at one
96b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * time, although symbols of the same type (e.g. space) can be safely grouped together.
97b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   *
98b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * Note that if there is a match, we will always check any text found up to the first match as
99b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia   * well.
100372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   */
101b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia  private static final Pattern[] INNER_MATCHES = {
102b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
103b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("/+(.*)"),
104b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Note that the bracket here is inside the capturing group, since we consider it part of the
105b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
106b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("(\\([^(]*)"),
107b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
108b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // We require a space on either side of the hyphen for it to be considered a separator.
109b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"),
110b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
111b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // possible that it's supposed to be used to break two numbers without spaces, and we haven't
112b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // seen many instances of it used within a number.
113b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"),
114b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
115b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("\\.+\\p{Z}*([^.]+)"),
116b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      // Breaks on space - e.g. "3324451234 8002341234"
117b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Pattern.compile("\\p{Z}+(\\P{Z}+)")
118b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia  };
119372bff8dd464574d36737d47e495cad14346653cShaopeng Jia
120d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  /**
121d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   * Punctuation that may be at the start of a phone number - brackets and plus signs.
122d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   */
123d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  private static final Pattern LEAD_CLASS;
124d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia
12552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  static {
126372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
127372bff8dd464574d36737d47e495cad14346653cShaopeng Jia     * to make the pattern more easily understood. */
128372bff8dd464574d36737d47e495cad14346653cShaopeng Jia
129372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    String openingParens = "(\\[\uFF08\uFF3B";
130372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    String closingParens = ")\\]\uFF09\uFF3D";
131372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    String nonParens = "[^" + openingParens + closingParens + "]";
132372bff8dd464574d36737d47e495cad14346653cShaopeng Jia
133372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    /* Limit on the number of pairs of brackets in a phone number. */
134372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    String bracketPairLimit = limit(0, 3);
135372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    /*
136372bff8dd464574d36737d47e495cad14346653cShaopeng Jia     * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
137372bff8dd464574d36737d47e495cad14346653cShaopeng Jia     * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
138372bff8dd464574d36737d47e495cad14346653cShaopeng Jia     * closing bracket first. We limit the sets of brackets in a phone number to four.
139372bff8dd464574d36737d47e495cad14346653cShaopeng Jia     */
140372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    MATCHING_BRACKETS = Pattern.compile(
141372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" +
142372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        nonParens + "+" +
143372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit +
144372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        nonParens + "*");
14552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
14652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* Limit on the number of leading (plus) characters. */
14752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String leadLimit = limit(0, 2);
14852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* Limit on the number of consecutive punctuation characters. */
14952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String punctuationLimit = limit(0, 4);
15052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
15152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia     * single block, set high enough to accommodate the entire national number and the international
15252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia     * country code. */
15352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    int digitBlockLimit =
15452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
155d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
15652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia     * formats use spaces to separate each digit. */
15752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String blockLimit = limit(0, digitBlockLimit);
15852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
15952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* A punctuation sequence allowing white space. */
16052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
16152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* A digits block without punctuation. */
16252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
163d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia
164f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia    String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
165f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia    String leadClass = "[" + leadClassChars + "]";
166d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    LEAD_CLASS = Pattern.compile(leadClass);
16752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
16852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    /* Phone number pattern allowing optional punctuation. */
16952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    PATTERN = Pattern.compile(
17052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        "(?:" + leadClass + punctuation + ")" + leadLimit +
17152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit +
172f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
17352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        PhoneNumberUtil.REGEX_FLAGS);
17452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
17552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
17652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** Returns a regular expression quantifier with an upper and lower limit. */
17752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private static String limit(int lower, int upper) {
17852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    if ((lower < 0) || (upper <= 0) || (upper < lower)) {
17952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      throw new IllegalArgumentException();
18052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
18152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return "{" + lower + "," + upper + "}";
18252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
18352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
18452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The potential states of a PhoneNumberMatcher. */
18552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private enum State {
18652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    NOT_READY, READY, DONE
18752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
18852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
18952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The phone number utility. */
190d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  private final PhoneNumberUtil phoneUtil;
19152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The text searched for phone numbers. */
19252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private final CharSequence text;
19352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
19452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * The region (country) to assume for phone numbers without an international prefix, possibly
19552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * null.
19652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
19752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private final String preferredRegion;
19852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The degree of validation requested. */
19952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private final Leniency leniency;
20052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The maximum number of retries after matching an invalid number. */
20152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private long maxTries;
20252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
20352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The iteration tristate. */
20452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private State state = State.NOT_READY;
20552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The last successful match, null unless in {@link State#READY}. */
20652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private PhoneNumberMatch lastMatch = null;
20752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /** The next index to start searching at. Undefined in {@link State#DONE}. */
20852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private int searchIndex = 0;
20952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
21052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
21152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
21252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * new instance.
21352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
21452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param util      the phone number util to use
21552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param text      the character sequence that we will search, null for no text
216f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia   * @param country   the country to assume for phone numbers not written in international format
217f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia   *                  (with a leading plus, or with the international dialing prefix of the
218f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia   *                  specified region). May be null or "ZZ" if only numbers with a
219f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia   *                  leading plus should be considered.
22052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param leniency  the leniency to use when evaluating candidate phone numbers
22152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
22252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *                  This is to cover degenerate cases where the text has a lot of false positives
22352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *                  in it. Must be {@code >= 0}.
22452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
22552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
22652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      long maxTries) {
22752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
22852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    if ((util == null) || (leniency == null)) {
22952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      throw new NullPointerException();
23052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
23152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    if (maxTries < 0) {
23252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      throw new IllegalArgumentException();
23352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
234d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    this.phoneUtil = util;
23552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    this.text = (text != null) ? text : "";
23652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    this.preferredRegion = country;
23752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    this.leniency = leniency;
23852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    this.maxTries = maxTries;
23952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
24052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
24152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
24252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
24352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * that represents a phone number. Returns the next match, null if none was found.
24452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
24552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param index  the search index to start searching at
24652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @return  the phone number match found, null if none can be found
24752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
24852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private PhoneNumberMatch find(int index) {
24952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    Matcher matcher = PATTERN.matcher(text);
25052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    while ((maxTries > 0) && matcher.find(index)) {
25152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      int start = matcher.start();
25252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      CharSequence candidate = text.subSequence(start, matcher.end());
25352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
25452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      // Check for extra numbers at the end.
25552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      // TODO: This is the place to start when trying to support extraction of multiple phone number
25652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      // from split notations (+41 79 123 45 67 / 68).
25752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
25852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
25952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      PhoneNumberMatch match = extractMatch(candidate, start);
26052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      if (match != null) {
26152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        return match;
26252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      }
26352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
26452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      index = start + candidate.length();
26552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      maxTries--;
26652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
26752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
26852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return null;
26952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
27052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
27152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
27252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Trims away any characters after the first match of {@code pattern} in {@code candidate},
27352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * returning the trimmed version.
27452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
27552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
27652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    Matcher trailingCharsMatcher = pattern.matcher(candidate);
27752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    if (trailingCharsMatcher.find()) {
27852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      candidate = candidate.subSequence(0, trailingCharsMatcher.start());
27952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
28052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return candidate;
28152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
28252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
28352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
284d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
285d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   * combining marks should also return true since we assume they have been added to a preceding
286d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   * Latin character.
287d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia   */
288a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia  // @VisibleForTesting
289d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  static boolean isLatinLetter(char letter) {
290d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    // Combining marks are a subset of non-spacing-mark.
291d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
292d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia      return false;
293d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    }
294d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    UnicodeBlock block = UnicodeBlock.of(letter);
295d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia    return block.equals(UnicodeBlock.BASIC_LATIN) ||
296d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia        block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
297d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia        block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
298d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia        block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
299d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia        block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
300d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia        block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
301d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  }
302d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia
30396a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia  private static boolean isInvalidPunctuationSymbol(char character) {
30496a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia    return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
305f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia  }
306f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia
307d1db4c508088ac27cb0815a222cf2600f16ad5b9Shaopeng Jia  /**
30852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Attempts to extract a match from a {@code candidate} character sequence.
30952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
31052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param candidate  the candidate text that might contain a phone number
31152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param offset  the offset of {@code candidate} within {@link #text}
31252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @return  the match found, null if none can be found
31352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
31452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
315b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia    // Skip a match that is more likely to be a date.
316b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia    if (SLASH_SEPARATED_DATES.matcher(candidate).find()) {
31752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      return null;
31852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
319b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia
320cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia    // Skip potential time-stamps.
321cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia    if (TIME_STAMPS.matcher(candidate).find()) {
322cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia      String followingText = text.toString().substring(offset + candidate.length());
323cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia      if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
324cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia        return null;
325cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia      }
326cc0ae7326023c5da91dd9a84035c3dc3f6cc372fShaopeng Jia    }
32752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
32852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    // Try to come up with a valid match given the entire candidate.
32952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    String rawString = candidate.toString();
33052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    PhoneNumberMatch match = parseAndVerify(rawString, offset);
33152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    if (match != null) {
33252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      return match;
33352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
33452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
335372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    // If that failed, try to find an "inner match" - there might be a phone number within this
336372bff8dd464574d36737d47e495cad14346653cShaopeng Jia    // candidate.
33752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return extractInnerMatch(rawString, offset);
33852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
33952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
34052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
341372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
342372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * match.
34352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
34452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param candidate  the candidate text that might contain a phone number
345372bff8dd464574d36737d47e495cad14346653cShaopeng Jia   * @param offset  the current offset of {@code candidate} within {@link #text}
34652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @return  the match found, null if none can be found
34752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
34852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
349b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia    for (Pattern possibleInnerMatch : INNER_MATCHES) {
350b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      Matcher groupMatcher = possibleInnerMatch.matcher(candidate);
351b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      boolean isFirstMatch = true;
352b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      while (groupMatcher.find() && maxTries > 0) {
353b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia        if (isFirstMatch) {
354b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          // We should handle any group before this one too.
355b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          CharSequence group = trimAfterFirstMatch(
356b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia              PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
357b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia              candidate.substring(0, groupMatcher.start()));
358b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          PhoneNumberMatch match = parseAndVerify(group.toString(), offset);
359b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          if (match != null) {
360b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia            return match;
361b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          }
362b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          maxTries--;
363b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia          isFirstMatch = false;
364f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        }
365b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia        CharSequence group = trimAfterFirstMatch(
366b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia            PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1));
367b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia        PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1));
368372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        if (match != null) {
369372bff8dd464574d36737d47e495cad14346653cShaopeng Jia          return match;
370372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        }
371372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        maxTries--;
372372bff8dd464574d36737d47e495cad14346653cShaopeng Jia      }
37352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
37452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return null;
37552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
37652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
37752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
37852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
37952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
38052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
38152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   *
38252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param candidate  the candidate match
38352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @param offset  the offset of {@code candidate} within {@link #text}
38452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * @return  the parsed and validated phone number match, or null
38552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
38652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
38752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    try {
388372bff8dd464574d36737d47e495cad14346653cShaopeng Jia      // Check the candidate doesn't contain any formatting which would indicate that it really
389372bff8dd464574d36737d47e495cad14346653cShaopeng Jia      // isn't a phone number.
390b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia      if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) {
391372bff8dd464574d36737d47e495cad14346653cShaopeng Jia        return null;
392372bff8dd464574d36737d47e495cad14346653cShaopeng Jia      }
393f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia
394f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia      // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
395f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia      // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
396f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia      if (leniency.compareTo(Leniency.VALID) >= 0) {
397f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        // If the candidate is not at the start of the text, and does not start with phone-number
398f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        // punctuation, check the previous character.
399f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
400f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia          char previousChar = text.charAt(offset - 1);
40196a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia          // We return null if it is a latin letter or an invalid punctuation symbol.
40296a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia          if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
403f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia            return null;
404f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia          }
405f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        }
406f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        int lastCharIndex = offset + candidate.length();
407f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        if (lastCharIndex < text.length()) {
408f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia          char nextChar = text.charAt(lastCharIndex);
40996a7214cdabf08f53f5b1a560304601c9f65e0b9Shaopeng Jia          if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
410f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia            return null;
411f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia          }
412f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia        }
413f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia      }
414f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia
415a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia      PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
416b03a14984ad9b0d8b88337ca714cb831233b99c3Shaopeng Jia
417f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // Check Israel * numbers: these are a special case in that they are four-digit numbers that
418f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // our library supports, but they can only be dialled with a leading *. Since we don't
419f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // actually store or detect the * in our phone number library, this means in practice we
420f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // detect most four digit numbers as being valid for Israel. We are considering moving these
421f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the
422f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // meantime we want to restrict the false matches so we only allow these numbers if they are
423f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // preceded by a star. We enforce this for all leniency levels even though these numbers are
424f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a
425f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // deficiency in those methods that they accept these numbers without the *.
426f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // TODO: Remove this or make it significantly less hacky once we've decided how to
427f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // handle these short codes going forward in ShortNumberInfo. We could use the formatting
428f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // rules for instance, but that would be slower.
429f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL") &&
430f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia          phoneUtil.getNationalSignificantNumber(number).length() == 4 &&
431f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia          (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) {
432f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia        // No match.
433f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia        return null;
434f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      }
435f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia
436f7e0224b862054893f28d2736b3f6804d9935886Shaopeng Jia      if (leniency.verify(number, candidate, phoneUtil)) {
437a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
438a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        // values parsed. TODO: stop clearing all values here and switch all users over
439a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        // to using rawInput() rather than the rawString() of PhoneNumberMatch.
440a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        number.clearCountryCodeSource();
441a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        number.clearRawInput();
442a48b2d221ba46df7446d1a87244efa985e8e292fShaopeng Jia        number.clearPreferredDomesticCarrierCode();
44352699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia        return new PhoneNumberMatch(offset, candidate, number);
44452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      }
44552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    } catch (NumberParseException e) {
44652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia      // ignore and continue
44752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    }
44852699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    return null;
44952699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
45052699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia
45152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  /**
452b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia   * Small helper interface such that the number groups can be checked according to different
4534b867acb917b73e699a596df94445c634c916519Shaopeng Jia   * criteria, both for our default way of performing formatting and for any alternate formats we
4544b867acb917b73e699a596df94445c634c916519Shaopeng Jia   * may want to check.
455b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia   */
456b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  interface NumberGroupingChecker {
457b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    /**
458b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     * Returns true if the groups of digits found in our candidate phone number match our
459b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     * expectations.
460b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     *
461b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     * @param number  the original number we found when parsing
462b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
463b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     *     but with non-digits (spaces etc) retained
464b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     * @param expectedNumberGroups  the groups of digits that we would expect to see if we
465b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     *     formatted this number
466b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia     */
467b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
468b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                        StringBuilder normalizedCandidate, String[] expectedNumberGroups);
469b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
470b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
471b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
472b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                              PhoneNumber number,
473b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                              StringBuilder normalizedCandidate,
474b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                              String[] formattedNumberGroups) {
475b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    int fromIndex = 0;
476f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia    if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
477f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      // First skip the country code if the normalized candidate contained it.
478f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      String countryCode = Integer.toString(number.getCountryCode());
479f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia      fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length();
480f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia    }
481b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Check each group of consecutive digits are not broken into separate groupings in the
482b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // {@code normalizedCandidate} string.
483b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    for (int i = 0; i < formattedNumberGroups.length; i++) {
484b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
485b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // doesn't contain the consecutive digits in formattedNumberGroups[i].
486b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
487b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (fromIndex < 0) {
488b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        return false;
489b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
490b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // Moves {@code fromIndex} forward.
491b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      fromIndex += formattedNumberGroups[i].length();
492b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (i == 0 && fromIndex < normalizedCandidate.length()) {
493bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        // We are at the position right after the NDC. We get the region used for formatting
494bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        // information based on the country code in the phone number, rather than the number itself,
495bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        // as we do not need to distinguish between different countries with the same country
496bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        // calling code and this is faster.
497bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        String region = util.getRegionCodeForCountryCode(number.getCountryCode());
498bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        if (util.getNddPrefixForRegion(region, true) != null &&
499bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia            Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
500b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          // This means there is no formatting symbol after the NDC. In this case, we only
501b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          // accept the number if there is no formatting symbol at all in the number, except
502bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia          // for extensions. This is only important for countries with national prefixes.
503b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          String nationalSignificantNumber = util.getNationalSignificantNumber(number);
504b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
505b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia              .startsWith(nationalSignificantNumber);
506b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        }
507b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
508b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
509b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // The check here makes sure that we haven't mistakenly already used the extension to
510b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // match the last group of the subscriber number. Note the extension cannot have
511b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // formatting in-between digits.
512b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
513b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
514b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
515b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
516b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                                  PhoneNumber number,
517b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                                  StringBuilder normalizedCandidate,
518b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                                  String[] formattedNumberGroups) {
519b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    String[] candidateGroups =
520b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
521b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Set this to the last group, skipping it if the number has an extension.
522b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    int candidateNumberGroupIndex =
523b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
524b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // First we check if the national significant number is formatted as a block.
525b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // We use contains and not equals, since the national significant number may be present with
526b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // a prefix such as a national number prefix, or the country code itself.
527b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (candidateGroups.length == 1 ||
528b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        candidateGroups[candidateNumberGroupIndex].contains(
529b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia            util.getNationalSignificantNumber(number))) {
530b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return true;
531b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
532b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Starting from the end, go through in reverse, excluding the first group, and check the
533b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // candidate and number groups are the same.
534b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
535b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia         formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
536b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia         formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
537b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (!candidateGroups[candidateNumberGroupIndex].equals(
538b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          formattedNumberGroups[formattedNumberGroupIndex])) {
539b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        return false;
540b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
541b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
542b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Now check the first group. There may be a national prefix at the start, so we only check
543b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // that the candidate group ends with the formatted number group.
544b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return (candidateNumberGroupIndex >= 0 &&
545b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia            candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
546b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
547b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
548b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  /**
549b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia   * Helper method to get the national-number part of a number, formatted without any national
550b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia   * prefix, and return it as a set of digit blocks that would be formatted together.
551b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia   */
552b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
553b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                                  NumberFormat formattingPattern) {
554b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (formattingPattern == null) {
555b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
556b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
557b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // We remove the extension part from the formatted string before splitting it into different
558b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // groups.
559b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      int endIndex = rfc3966Format.indexOf(';');
560b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (endIndex < 0) {
561b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        endIndex = rfc3966Format.length();
562b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
563b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // The country-code will have a '-' following it.
564b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      int startIndex = rfc3966Format.indexOf('-') + 1;
565b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return rfc3966Format.substring(startIndex, endIndex).split("-");
566b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    } else {
567b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // We format the NSN only, and split that according to the separator.
568b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      String nationalSignificantNumber = util.getNationalSignificantNumber(number);
569b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return util.formatNsnUsingPattern(nationalSignificantNumber,
570b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia                                        formattingPattern, PhoneNumberFormat.RFC3966).split("-");
571b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
572b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
573b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
574b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  static boolean checkNumberGroupingIsValid(
575b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
576b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
577b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // and optimise if necessary.
578b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    StringBuilder normalizedCandidate =
579b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
580b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
581b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
582b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return true;
583b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
5844b867acb917b73e699a596df94445c634c916519Shaopeng Jia    // If this didn't pass, see if there are any alternate formats, and try them instead.
5854b867acb917b73e699a596df94445c634c916519Shaopeng Jia    PhoneMetadata alternateFormats =
5864b867acb917b73e699a596df94445c634c916519Shaopeng Jia        MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
5874b867acb917b73e699a596df94445c634c916519Shaopeng Jia    if (alternateFormats != null) {
5884b867acb917b73e699a596df94445c634c916519Shaopeng Jia      for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
5894b867acb917b73e699a596df94445c634c916519Shaopeng Jia        formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
5904b867acb917b73e699a596df94445c634c916519Shaopeng Jia        if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
5914b867acb917b73e699a596df94445c634c916519Shaopeng Jia          return true;
5924b867acb917b73e699a596df94445c634c916519Shaopeng Jia        }
5934b867acb917b73e699a596df94445c634c916519Shaopeng Jia      }
5944b867acb917b73e699a596df94445c634c916519Shaopeng Jia    }
595b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return false;
596b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
597b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
598bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia  static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) {
599bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    int firstSlashInBodyIndex = candidate.indexOf('/');
600bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    if (firstSlashInBodyIndex < 0) {
601bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      // No slashes, this is okay.
602bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      return false;
603bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    }
604bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    // Now look for a second one.
605bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1);
606bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    if (secondSlashInBodyIndex < 0) {
607bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      // Only one slash, this is okay.
608bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      return false;
609bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    }
610bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia
611bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    // If the first slash is after the country calling code, this is permitted.
612bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    boolean candidateHasCountryCode =
613bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN ||
614bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia         number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
615bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    if (candidateHasCountryCode &&
616bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia        PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex))
617bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia            .equals(Integer.toString(number.getCountryCode()))) {
618bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      // Any more slashes and this is illegal.
619bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      return candidate.substring(secondSlashInBodyIndex + 1).contains("/");
620bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    }
621bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia    return true;
622b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
623b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
624b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  static boolean containsOnlyValidXChars(
625b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      PhoneNumber number, String candidate, PhoneNumberUtil util) {
626b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
627b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // national significant number or (2) an extension sign, in which case they always precede the
628b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // extension number. We assume a carrier code is more than 1 digit, so the first case has to
629b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
630b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // or 'X'. We ignore the character if it appears as the last character of the string.
631b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    for (int index = 0; index < candidate.length() - 1; index++) {
632b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      char charAtIndex = candidate.charAt(index);
633b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (charAtIndex == 'x' || charAtIndex == 'X') {
634b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        char charAtNextIndex = candidate.charAt(index + 1);
635b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
636b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          // This is the carrier code case, in which the 'X's always precede the national
637b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          // significant number.
638b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          index++;
639b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
640b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia            return false;
641b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia          }
642b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        // This is the extension sign case, in which the 'x' or 'X' should always precede the
643b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        // extension number.
644b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
645b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia            number.getExtension())) {
646f9768eb3c8f303725fb4f899598481cbc4fb76a3Shaopeng Jia          return false;
647b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        }
648b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
649b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
650b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return true;
651b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
652b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
653b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
654b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // First, check how we deduced the country code. If it was written in international format, then
655b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // the national prefix is not required.
656b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
657b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return true;
658b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
659b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    String phoneNumberRegion =
660b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        util.getRegionCodeForCountryCode(number.getCountryCode());
661b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
662b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (metadata == null) {
663b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return true;
664b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
665b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Check if a national prefix should be present when formatting this number.
666b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    String nationalNumber = util.getNationalSignificantNumber(number);
667b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    NumberFormat formatRule =
668b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
669b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // To do this, we check that a national prefix formatting rule was present and that it wasn't
670b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // just the first-group symbol ($1) with punctuation.
671b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
672b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
673b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        // The national-prefix is optional in these cases, so we don't need to check if it was
674b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        // present.
675b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        return true;
676b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
677bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia      if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly(
678bb78ce92eae2d5de9aa06b27ed3b87bc496c79eeShaopeng Jia          formatRule.getNationalPrefixFormattingRule())) {
679b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        // National Prefix not needed for this number.
680b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        return true;
681b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
682b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // Normalize the remainder.
683b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
684b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      StringBuilder rawInput = new StringBuilder(rawInputCopy);
685b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // Check if we found a national prefix and/or carrier code at the start of the raw input, and
686b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      // return the result.
687b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
688b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
689b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return true;
690b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
691b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
692b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher  @Override
693b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  public boolean hasNext() {
694b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (state == State.NOT_READY) {
695b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      lastMatch = find(searchIndex);
696b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      if (lastMatch == null) {
697b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        state = State.DONE;
698b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      } else {
699b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        searchIndex = lastMatch.end();
700b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia        state = State.READY;
701b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      }
702b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
703b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return state == State.READY;
704b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
705b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
706b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher  @Override
707b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  public PhoneNumberMatch next() {
708b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Check the state and find the next match as a side-effect if necessary.
709b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    if (!hasNext()) {
710b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia      throw new NoSuchElementException();
711b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    }
712b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
713b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    // Don't retain that memory any longer than necessary.
714b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    PhoneNumberMatch result = lastMatch;
715b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    lastMatch = null;
716b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    state = State.NOT_READY;
717b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia    return result;
718b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  }
719b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia
720b210e301245cb5dc90aa3142f378632bd41cb172Shaopeng Jia  /**
72152699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   * Always throws {@link UnsupportedOperationException} as removal is not supported.
72252699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia   */
723b0d1cb1662daab3451e5fa59b8f073e9992b628bAndy Staudacher  @Override
72452699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  public void remove() {
72552699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia    throw new UnsupportedOperationException();
72652699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia  }
72752699e08e855fb0957944b4e73358ad9e0007c0cShaopeng Jia}
728