1/*
2 * Copyright (C) 2011 The Libphonenumber Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.i18n.phonenumbers;
18
19import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
20import com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType;
21import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
22import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
23import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
24import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
25import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
26
27import java.lang.Character.UnicodeBlock;
28import java.util.Iterator;
29import java.util.NoSuchElementException;
30import java.util.regex.Matcher;
31import java.util.regex.Pattern;
32
33/**
34 * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
35 * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
36 * {@link PhoneNumberUtil}.
37 *
38 * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
39 * not found.
40 *
41 * <p>This class is not thread-safe.
42 */
43final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
44  /**
45   * The phone number pattern used by {@link #find}, similar to
46   * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
47   * <ul>
48   *   <li>All captures are limited in order to place an upper bound to the text matched by the
49   *       pattern.
50   * <ul>
51   *   <li>Leading punctuation / plus signs are limited.
52   *   <li>Consecutive occurrences of punctuation are limited.
53   *   <li>Number of digits is limited.
54   * </ul>
55   *   <li>No whitespace is allowed at the start or end.
56   *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
57   * </ul>
58   */
59  private static final Pattern PATTERN;
60  /**
61   * Matches strings that look like publication pages. Example:
62   * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
63   * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
64   *
65   * The string "211-227 (2003)" is not a telephone number.
66   */
67  private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
68
69  /**
70   * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
71   * 08/31/95.
72   */
73  private static final Pattern SLASH_SEPARATED_DATES =
74      Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
75
76  /**
77   * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
78   * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
79   */
80  private static final Pattern TIME_STAMPS =
81      Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$");
82  private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
83
84  /**
85   * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
86   * This also checks that there is something inside the brackets. Having no brackets at all is also
87   * fine.
88   */
89  private static final Pattern MATCHING_BRACKETS;
90
91  /**
92   * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
93   * ordered according to specificity. For example, white-space is last since that is frequently
94   * used in numbers, not just to separate two numbers. We have separate patterns since we don't
95   * want to break up the phone-number-like text on more than one different kind of symbol at one
96   * time, although symbols of the same type (e.g. space) can be safely grouped together.
97   *
98   * Note that if there is a match, we will always check any text found up to the first match as
99   * well.
100   */
101  private static final Pattern[] INNER_MATCHES = {
102      // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
103      Pattern.compile("/+(.*)"),
104      // Note that the bracket here is inside the capturing group, since we consider it part of the
105      // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
106      Pattern.compile("(\\([^(]*)"),
107      // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
108      // We require a space on either side of the hyphen for it to be considered a separator.
109      Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"),
110      // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
111      // possible that it's supposed to be used to break two numbers without spaces, and we haven't
112      // seen many instances of it used within a number.
113      Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"),
114      // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
115      Pattern.compile("\\.+\\p{Z}*([^.]+)"),
116      // Breaks on space - e.g. "3324451234 8002341234"
117      Pattern.compile("\\p{Z}+(\\P{Z}+)")
118  };
119
120  /**
121   * Punctuation that may be at the start of a phone number - brackets and plus signs.
122   */
123  private static final Pattern LEAD_CLASS;
124
125  static {
126    /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
127     * to make the pattern more easily understood. */
128
129    String openingParens = "(\\[\uFF08\uFF3B";
130    String closingParens = ")\\]\uFF09\uFF3D";
131    String nonParens = "[^" + openingParens + closingParens + "]";
132
133    /* Limit on the number of pairs of brackets in a phone number. */
134    String bracketPairLimit = limit(0, 3);
135    /*
136     * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
137     * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
138     * closing bracket first. We limit the sets of brackets in a phone number to four.
139     */
140    MATCHING_BRACKETS = Pattern.compile(
141        "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" +
142        nonParens + "+" +
143        "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit +
144        nonParens + "*");
145
146    /* Limit on the number of leading (plus) characters. */
147    String leadLimit = limit(0, 2);
148    /* Limit on the number of consecutive punctuation characters. */
149    String punctuationLimit = limit(0, 4);
150    /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
151     * single block, set high enough to accommodate the entire national number and the international
152     * country code. */
153    int digitBlockLimit =
154        PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
155    /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
156     * formats use spaces to separate each digit. */
157    String blockLimit = limit(0, digitBlockLimit);
158
159    /* A punctuation sequence allowing white space. */
160    String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
161    /* A digits block without punctuation. */
162    String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
163
164    String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
165    String leadClass = "[" + leadClassChars + "]";
166    LEAD_CLASS = Pattern.compile(leadClass);
167
168    /* Phone number pattern allowing optional punctuation. */
169    PATTERN = Pattern.compile(
170        "(?:" + leadClass + punctuation + ")" + leadLimit +
171        digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit +
172        "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
173        PhoneNumberUtil.REGEX_FLAGS);
174  }
175
176  /** Returns a regular expression quantifier with an upper and lower limit. */
177  private static String limit(int lower, int upper) {
178    if ((lower < 0) || (upper <= 0) || (upper < lower)) {
179      throw new IllegalArgumentException();
180    }
181    return "{" + lower + "," + upper + "}";
182  }
183
184  /** The potential states of a PhoneNumberMatcher. */
185  private enum State {
186    NOT_READY, READY, DONE
187  }
188
189  /** The phone number utility. */
190  private final PhoneNumberUtil phoneUtil;
191  /** The text searched for phone numbers. */
192  private final CharSequence text;
193  /**
194   * The region (country) to assume for phone numbers without an international prefix, possibly
195   * null.
196   */
197  private final String preferredRegion;
198  /** The degree of validation requested. */
199  private final Leniency leniency;
200  /** The maximum number of retries after matching an invalid number. */
201  private long maxTries;
202
203  /** The iteration tristate. */
204  private State state = State.NOT_READY;
205  /** The last successful match, null unless in {@link State#READY}. */
206  private PhoneNumberMatch lastMatch = null;
207  /** The next index to start searching at. Undefined in {@link State#DONE}. */
208  private int searchIndex = 0;
209
210  /**
211   * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
212   * new instance.
213   *
214   * @param util      the phone number util to use
215   * @param text      the character sequence that we will search, null for no text
216   * @param country   the country to assume for phone numbers not written in international format
217   *                  (with a leading plus, or with the international dialing prefix of the
218   *                  specified region). May be null or "ZZ" if only numbers with a
219   *                  leading plus should be considered.
220   * @param leniency  the leniency to use when evaluating candidate phone numbers
221   * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
222   *                  This is to cover degenerate cases where the text has a lot of false positives
223   *                  in it. Must be {@code >= 0}.
224   */
225  PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
226      long maxTries) {
227
228    if ((util == null) || (leniency == null)) {
229      throw new NullPointerException();
230    }
231    if (maxTries < 0) {
232      throw new IllegalArgumentException();
233    }
234    this.phoneUtil = util;
235    this.text = (text != null) ? text : "";
236    this.preferredRegion = country;
237    this.leniency = leniency;
238    this.maxTries = maxTries;
239  }
240
241  /**
242   * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
243   * that represents a phone number. Returns the next match, null if none was found.
244   *
245   * @param index  the search index to start searching at
246   * @return  the phone number match found, null if none can be found
247   */
248  private PhoneNumberMatch find(int index) {
249    Matcher matcher = PATTERN.matcher(text);
250    while ((maxTries > 0) && matcher.find(index)) {
251      int start = matcher.start();
252      CharSequence candidate = text.subSequence(start, matcher.end());
253
254      // Check for extra numbers at the end.
255      // TODO: This is the place to start when trying to support extraction of multiple phone number
256      // from split notations (+41 79 123 45 67 / 68).
257      candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
258
259      PhoneNumberMatch match = extractMatch(candidate, start);
260      if (match != null) {
261        return match;
262      }
263
264      index = start + candidate.length();
265      maxTries--;
266    }
267
268    return null;
269  }
270
271  /**
272   * Trims away any characters after the first match of {@code pattern} in {@code candidate},
273   * returning the trimmed version.
274   */
275  private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
276    Matcher trailingCharsMatcher = pattern.matcher(candidate);
277    if (trailingCharsMatcher.find()) {
278      candidate = candidate.subSequence(0, trailingCharsMatcher.start());
279    }
280    return candidate;
281  }
282
283  /**
284   * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
285   * combining marks should also return true since we assume they have been added to a preceding
286   * Latin character.
287   */
288  // @VisibleForTesting
289  static boolean isLatinLetter(char letter) {
290    // Combining marks are a subset of non-spacing-mark.
291    if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
292      return false;
293    }
294    UnicodeBlock block = UnicodeBlock.of(letter);
295    return block.equals(UnicodeBlock.BASIC_LATIN) ||
296        block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
297        block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
298        block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
299        block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
300        block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
301  }
302
303  private static boolean isInvalidPunctuationSymbol(char character) {
304    return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
305  }
306
307  /**
308   * Attempts to extract a match from a {@code candidate} character sequence.
309   *
310   * @param candidate  the candidate text that might contain a phone number
311   * @param offset  the offset of {@code candidate} within {@link #text}
312   * @return  the match found, null if none can be found
313   */
314  private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
315    // Skip a match that is more likely to be a date.
316    if (SLASH_SEPARATED_DATES.matcher(candidate).find()) {
317      return null;
318    }
319
320    // Skip potential time-stamps.
321    if (TIME_STAMPS.matcher(candidate).find()) {
322      String followingText = text.toString().substring(offset + candidate.length());
323      if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
324        return null;
325      }
326    }
327
328    // Try to come up with a valid match given the entire candidate.
329    String rawString = candidate.toString();
330    PhoneNumberMatch match = parseAndVerify(rawString, offset);
331    if (match != null) {
332      return match;
333    }
334
335    // If that failed, try to find an "inner match" - there might be a phone number within this
336    // candidate.
337    return extractInnerMatch(rawString, offset);
338  }
339
340  /**
341   * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
342   * match.
343   *
344   * @param candidate  the candidate text that might contain a phone number
345   * @param offset  the current offset of {@code candidate} within {@link #text}
346   * @return  the match found, null if none can be found
347   */
348  private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
349    for (Pattern possibleInnerMatch : INNER_MATCHES) {
350      Matcher groupMatcher = possibleInnerMatch.matcher(candidate);
351      boolean isFirstMatch = true;
352      while (groupMatcher.find() && maxTries > 0) {
353        if (isFirstMatch) {
354          // We should handle any group before this one too.
355          CharSequence group = trimAfterFirstMatch(
356              PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
357              candidate.substring(0, groupMatcher.start()));
358          PhoneNumberMatch match = parseAndVerify(group.toString(), offset);
359          if (match != null) {
360            return match;
361          }
362          maxTries--;
363          isFirstMatch = false;
364        }
365        CharSequence group = trimAfterFirstMatch(
366            PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1));
367        PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1));
368        if (match != null) {
369          return match;
370        }
371        maxTries--;
372      }
373    }
374    return null;
375  }
376
377  /**
378   * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
379   * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
380   * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
381   *
382   * @param candidate  the candidate match
383   * @param offset  the offset of {@code candidate} within {@link #text}
384   * @return  the parsed and validated phone number match, or null
385   */
386  private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
387    try {
388      // Check the candidate doesn't contain any formatting which would indicate that it really
389      // isn't a phone number.
390      if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) {
391        return null;
392      }
393
394      // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
395      // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
396      if (leniency.compareTo(Leniency.VALID) >= 0) {
397        // If the candidate is not at the start of the text, and does not start with phone-number
398        // punctuation, check the previous character.
399        if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
400          char previousChar = text.charAt(offset - 1);
401          // We return null if it is a latin letter or an invalid punctuation symbol.
402          if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
403            return null;
404          }
405        }
406        int lastCharIndex = offset + candidate.length();
407        if (lastCharIndex < text.length()) {
408          char nextChar = text.charAt(lastCharIndex);
409          if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
410            return null;
411          }
412        }
413      }
414
415      PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
416
417      // Check Israel * numbers: these are a special case in that they are four-digit numbers that
418      // our library supports, but they can only be dialled with a leading *. Since we don't
419      // actually store or detect the * in our phone number library, this means in practice we
420      // detect most four digit numbers as being valid for Israel. We are considering moving these
421      // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the
422      // meantime we want to restrict the false matches so we only allow these numbers if they are
423      // preceded by a star. We enforce this for all leniency levels even though these numbers are
424      // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a
425      // deficiency in those methods that they accept these numbers without the *.
426      // TODO: Remove this or make it significantly less hacky once we've decided how to
427      // handle these short codes going forward in ShortNumberInfo. We could use the formatting
428      // rules for instance, but that would be slower.
429      if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL") &&
430          phoneUtil.getNationalSignificantNumber(number).length() == 4 &&
431          (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) {
432        // No match.
433        return null;
434      }
435
436      if (leniency.verify(number, candidate, phoneUtil)) {
437        // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
438        // values parsed. TODO: stop clearing all values here and switch all users over
439        // to using rawInput() rather than the rawString() of PhoneNumberMatch.
440        number.clearCountryCodeSource();
441        number.clearRawInput();
442        number.clearPreferredDomesticCarrierCode();
443        return new PhoneNumberMatch(offset, candidate, number);
444      }
445    } catch (NumberParseException e) {
446      // ignore and continue
447    }
448    return null;
449  }
450
451  /**
452   * Small helper interface such that the number groups can be checked according to different
453   * criteria, both for our default way of performing formatting and for any alternate formats we
454   * may want to check.
455   */
456  interface NumberGroupingChecker {
457    /**
458     * Returns true if the groups of digits found in our candidate phone number match our
459     * expectations.
460     *
461     * @param number  the original number we found when parsing
462     * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
463     *     but with non-digits (spaces etc) retained
464     * @param expectedNumberGroups  the groups of digits that we would expect to see if we
465     *     formatted this number
466     */
467    boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
468                        StringBuilder normalizedCandidate, String[] expectedNumberGroups);
469  }
470
471  static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
472                                              PhoneNumber number,
473                                              StringBuilder normalizedCandidate,
474                                              String[] formattedNumberGroups) {
475    int fromIndex = 0;
476    if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
477      // First skip the country code if the normalized candidate contained it.
478      String countryCode = Integer.toString(number.getCountryCode());
479      fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length();
480    }
481    // Check each group of consecutive digits are not broken into separate groupings in the
482    // {@code normalizedCandidate} string.
483    for (int i = 0; i < formattedNumberGroups.length; i++) {
484      // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
485      // doesn't contain the consecutive digits in formattedNumberGroups[i].
486      fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
487      if (fromIndex < 0) {
488        return false;
489      }
490      // Moves {@code fromIndex} forward.
491      fromIndex += formattedNumberGroups[i].length();
492      if (i == 0 && fromIndex < normalizedCandidate.length()) {
493        // We are at the position right after the NDC. We get the region used for formatting
494        // information based on the country code in the phone number, rather than the number itself,
495        // as we do not need to distinguish between different countries with the same country
496        // calling code and this is faster.
497        String region = util.getRegionCodeForCountryCode(number.getCountryCode());
498        if (util.getNddPrefixForRegion(region, true) != null &&
499            Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
500          // This means there is no formatting symbol after the NDC. In this case, we only
501          // accept the number if there is no formatting symbol at all in the number, except
502          // for extensions. This is only important for countries with national prefixes.
503          String nationalSignificantNumber = util.getNationalSignificantNumber(number);
504          return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
505              .startsWith(nationalSignificantNumber);
506        }
507      }
508    }
509    // The check here makes sure that we haven't mistakenly already used the extension to
510    // match the last group of the subscriber number. Note the extension cannot have
511    // formatting in-between digits.
512    return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
513  }
514
515  static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
516                                                  PhoneNumber number,
517                                                  StringBuilder normalizedCandidate,
518                                                  String[] formattedNumberGroups) {
519    String[] candidateGroups =
520        PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
521    // Set this to the last group, skipping it if the number has an extension.
522    int candidateNumberGroupIndex =
523        number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
524    // First we check if the national significant number is formatted as a block.
525    // We use contains and not equals, since the national significant number may be present with
526    // a prefix such as a national number prefix, or the country code itself.
527    if (candidateGroups.length == 1 ||
528        candidateGroups[candidateNumberGroupIndex].contains(
529            util.getNationalSignificantNumber(number))) {
530      return true;
531    }
532    // Starting from the end, go through in reverse, excluding the first group, and check the
533    // candidate and number groups are the same.
534    for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
535         formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
536         formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
537      if (!candidateGroups[candidateNumberGroupIndex].equals(
538          formattedNumberGroups[formattedNumberGroupIndex])) {
539        return false;
540      }
541    }
542    // Now check the first group. There may be a national prefix at the start, so we only check
543    // that the candidate group ends with the formatted number group.
544    return (candidateNumberGroupIndex >= 0 &&
545            candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
546  }
547
548  /**
549   * Helper method to get the national-number part of a number, formatted without any national
550   * prefix, and return it as a set of digit blocks that would be formatted together.
551   */
552  private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
553                                                  NumberFormat formattingPattern) {
554    if (formattingPattern == null) {
555      // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
556      String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
557      // We remove the extension part from the formatted string before splitting it into different
558      // groups.
559      int endIndex = rfc3966Format.indexOf(';');
560      if (endIndex < 0) {
561        endIndex = rfc3966Format.length();
562      }
563      // The country-code will have a '-' following it.
564      int startIndex = rfc3966Format.indexOf('-') + 1;
565      return rfc3966Format.substring(startIndex, endIndex).split("-");
566    } else {
567      // We format the NSN only, and split that according to the separator.
568      String nationalSignificantNumber = util.getNationalSignificantNumber(number);
569      return util.formatNsnUsingPattern(nationalSignificantNumber,
570                                        formattingPattern, PhoneNumberFormat.RFC3966).split("-");
571    }
572  }
573
574  static boolean checkNumberGroupingIsValid(
575      PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
576    // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
577    // and optimise if necessary.
578    StringBuilder normalizedCandidate =
579        PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
580    String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
581    if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
582      return true;
583    }
584    // If this didn't pass, see if there are any alternate formats, and try them instead.
585    PhoneMetadata alternateFormats =
586        MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
587    if (alternateFormats != null) {
588      for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
589        formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
590        if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
591          return true;
592        }
593      }
594    }
595    return false;
596  }
597
598  static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) {
599    int firstSlashInBodyIndex = candidate.indexOf('/');
600    if (firstSlashInBodyIndex < 0) {
601      // No slashes, this is okay.
602      return false;
603    }
604    // Now look for a second one.
605    int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1);
606    if (secondSlashInBodyIndex < 0) {
607      // Only one slash, this is okay.
608      return false;
609    }
610
611    // If the first slash is after the country calling code, this is permitted.
612    boolean candidateHasCountryCode =
613        (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN ||
614         number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
615    if (candidateHasCountryCode &&
616        PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex))
617            .equals(Integer.toString(number.getCountryCode()))) {
618      // Any more slashes and this is illegal.
619      return candidate.substring(secondSlashInBodyIndex + 1).contains("/");
620    }
621    return true;
622  }
623
624  static boolean containsOnlyValidXChars(
625      PhoneNumber number, String candidate, PhoneNumberUtil util) {
626    // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
627    // national significant number or (2) an extension sign, in which case they always precede the
628    // extension number. We assume a carrier code is more than 1 digit, so the first case has to
629    // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
630    // or 'X'. We ignore the character if it appears as the last character of the string.
631    for (int index = 0; index < candidate.length() - 1; index++) {
632      char charAtIndex = candidate.charAt(index);
633      if (charAtIndex == 'x' || charAtIndex == 'X') {
634        char charAtNextIndex = candidate.charAt(index + 1);
635        if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
636          // This is the carrier code case, in which the 'X's always precede the national
637          // significant number.
638          index++;
639          if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
640            return false;
641          }
642        // This is the extension sign case, in which the 'x' or 'X' should always precede the
643        // extension number.
644        } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
645            number.getExtension())) {
646          return false;
647        }
648      }
649    }
650    return true;
651  }
652
653  static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
654    // First, check how we deduced the country code. If it was written in international format, then
655    // the national prefix is not required.
656    if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
657      return true;
658    }
659    String phoneNumberRegion =
660        util.getRegionCodeForCountryCode(number.getCountryCode());
661    PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
662    if (metadata == null) {
663      return true;
664    }
665    // Check if a national prefix should be present when formatting this number.
666    String nationalNumber = util.getNationalSignificantNumber(number);
667    NumberFormat formatRule =
668        util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
669    // To do this, we check that a national prefix formatting rule was present and that it wasn't
670    // just the first-group symbol ($1) with punctuation.
671    if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
672      if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
673        // The national-prefix is optional in these cases, so we don't need to check if it was
674        // present.
675        return true;
676      }
677      if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly(
678          formatRule.getNationalPrefixFormattingRule())) {
679        // National Prefix not needed for this number.
680        return true;
681      }
682      // Normalize the remainder.
683      String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
684      StringBuilder rawInput = new StringBuilder(rawInputCopy);
685      // Check if we found a national prefix and/or carrier code at the start of the raw input, and
686      // return the result.
687      return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
688    }
689    return true;
690  }
691
692  @Override
693  public boolean hasNext() {
694    if (state == State.NOT_READY) {
695      lastMatch = find(searchIndex);
696      if (lastMatch == null) {
697        state = State.DONE;
698      } else {
699        searchIndex = lastMatch.end();
700        state = State.READY;
701      }
702    }
703    return state == State.READY;
704  }
705
706  @Override
707  public PhoneNumberMatch next() {
708    // Check the state and find the next match as a side-effect if necessary.
709    if (!hasNext()) {
710      throw new NoSuchElementException();
711    }
712
713    // Don't retain that memory any longer than necessary.
714    PhoneNumberMatch result = lastMatch;
715    lastMatch = null;
716    state = State.NOT_READY;
717    return result;
718  }
719
720  /**
721   * Always throws {@link UnsupportedOperationException} as removal is not supported.
722   */
723  @Override
724  public void remove() {
725    throw new UnsupportedOperationException();
726  }
727}
728