1// Copyright (C) 2009 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: Shaopeng Jia
16// Open-sourced by: Philippe Liard
17
18#include "phonenumbers/phonenumberutil.h"
19
20#include <algorithm>
21#include <cctype>
22#include <cstring>
23#include <iterator>
24#include <map>
25#include <utility>
26#include <vector>
27
28#include <google/protobuf/message_lite.h>
29#include <unicode/uchar.h>
30#include <unicode/utf8.h>
31
32#include "phonenumbers/asyoutypeformatter.h"
33#include "phonenumbers/base/basictypes.h"
34#include "phonenumbers/base/logging.h"
35#include "phonenumbers/base/memory/singleton.h"
36#include "phonenumbers/default_logger.h"
37#include "phonenumbers/encoding_utils.h"
38#include "phonenumbers/metadata.h"
39#include "phonenumbers/normalize_utf8.h"
40#include "phonenumbers/phonemetadata.pb.h"
41#include "phonenumbers/phonenumber.h"
42#include "phonenumbers/phonenumber.pb.h"
43#include "phonenumbers/regexp_adapter.h"
44#include "phonenumbers/regexp_cache.h"
45#include "phonenumbers/regexp_factory.h"
46#include "phonenumbers/region_code.h"
47#include "phonenumbers/stl_util.h"
48#include "phonenumbers/stringutil.h"
49#include "phonenumbers/utf/unicodetext.h"
50#include "phonenumbers/utf/utf.h"
51
52namespace i18n {
53namespace phonenumbers {
54
55using std::make_pair;
56using std::sort;
57
58using google::protobuf::RepeatedPtrField;
59
60// static
61const char PhoneNumberUtil::kPlusChars[] = "+\xEF\xBC\x8B";  /* "++" */
62// To find out the unicode code-point of the characters below in vim, highlight
63// the character and type 'ga'. Note that the - is used to express ranges of
64// full-width punctuation below, as well as being present in the expression
65// itself. In emacs, you can use M-x unicode-what to query information about the
66// unicode character.
67// static
68const char PhoneNumberUtil::kValidPunctuation[] =
69    /* "-x‐-―−ー--/  ­<U+200B><U+2060> ()()[].\\[\\]/~⁓∼" */
70    "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC"
71    "\x8F \xC2\xA0\xC2\xAD\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88"
72    "\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC";
73
74// static
75const char PhoneNumberUtil::kCaptureUpToSecondNumberStart[] = "(.*)[\\\\/] *x";
76
77// static
78const char PhoneNumberUtil::kRegionCodeForNonGeoEntity[] = "001";
79
80namespace {
81
82// The prefix that needs to be inserted in front of a Colombian landline
83// number when dialed from a mobile phone in Colombia.
84const char kColombiaMobileToFixedLinePrefix[] = "3";
85
86// The kPlusSign signifies the international prefix.
87const char kPlusSign[] = "+";
88
89const char kStarSign[] = "*";
90
91const char kRfc3966ExtnPrefix[] = ";ext=";
92const char kRfc3966Prefix[] = "tel:";
93const char kRfc3966PhoneContext[] = ";phone-context=";
94const char kRfc3966IsdnSubaddress[] = ";isub=";
95
96const char kDigits[] = "\\p{Nd}";
97// We accept alpha characters in phone numbers, ASCII only. We store lower-case
98// here only since our regular expressions are case-insensitive.
99const char kValidAlpha[] = "a-z";
100
101// Default extension prefix to use when formatting. This will be put in front of
102// any extension component of the number, after the main national number is
103// formatted. For example, if you wish the default extension formatting to be "
104// extn: 3456", then you should specify " extn: " here as the default extension
105// prefix. This can be overridden by region-specific preferences.
106const char kDefaultExtnPrefix[] = " ext. ";
107
108// One-character symbols that can be used to indicate an extension.
109const char kSingleExtnSymbolsForMatching[] =
110    "x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E";
111
112bool LoadCompiledInMetadata(PhoneMetadataCollection* metadata) {
113  if (!metadata->ParseFromArray(metadata_get(), metadata_size())) {
114    LOG(ERROR) << "Could not parse binary data.";
115    return false;
116  }
117  return true;
118}
119
120// Returns a pointer to the description inside the metadata of the appropriate
121// type.
122const PhoneNumberDesc* GetNumberDescByType(
123    const PhoneMetadata& metadata,
124    PhoneNumberUtil::PhoneNumberType type) {
125  switch (type) {
126    case PhoneNumberUtil::PREMIUM_RATE:
127      return &metadata.premium_rate();
128    case PhoneNumberUtil::TOLL_FREE:
129      return &metadata.toll_free();
130    case PhoneNumberUtil::MOBILE:
131      return &metadata.mobile();
132    case PhoneNumberUtil::FIXED_LINE:
133    case PhoneNumberUtil::FIXED_LINE_OR_MOBILE:
134      return &metadata.fixed_line();
135    case PhoneNumberUtil::SHARED_COST:
136      return &metadata.shared_cost();
137    case PhoneNumberUtil::VOIP:
138      return &metadata.voip();
139    case PhoneNumberUtil::PERSONAL_NUMBER:
140      return &metadata.personal_number();
141    case PhoneNumberUtil::PAGER:
142      return &metadata.pager();
143    case PhoneNumberUtil::UAN:
144      return &metadata.uan();
145    case PhoneNumberUtil::VOICEMAIL:
146      return &metadata.voicemail();
147    default:
148      return &metadata.general_desc();
149  }
150}
151
152// A helper function that is used by Format and FormatByPattern.
153void PrefixNumberWithCountryCallingCode(
154    int country_calling_code,
155    PhoneNumberUtil::PhoneNumberFormat number_format,
156    string* formatted_number) {
157  switch (number_format) {
158    case PhoneNumberUtil::E164:
159      formatted_number->insert(0, StrCat(kPlusSign, country_calling_code));
160      return;
161    case PhoneNumberUtil::INTERNATIONAL:
162      formatted_number->insert(0, StrCat(kPlusSign, country_calling_code, " "));
163      return;
164    case PhoneNumberUtil::RFC3966:
165      formatted_number->insert(0, StrCat(kRfc3966Prefix, kPlusSign,
166                                         country_calling_code, "-"));
167      return;
168    case PhoneNumberUtil::NATIONAL:
169    default:
170      // Do nothing.
171      return;
172  }
173}
174
175// Returns true when one national number is the suffix of the other or both are
176// the same.
177bool IsNationalNumberSuffixOfTheOther(const PhoneNumber& first_number,
178                                      const PhoneNumber& second_number) {
179  const string& first_number_national_number =
180    SimpleItoa(static_cast<uint64>(first_number.national_number()));
181  const string& second_number_national_number =
182    SimpleItoa(static_cast<uint64>(second_number.national_number()));
183  // Note that HasSuffixString returns true if the numbers are equal.
184  return HasSuffixString(first_number_national_number,
185                         second_number_national_number) ||
186         HasSuffixString(second_number_national_number,
187                         first_number_national_number);
188}
189
190bool IsNumberMatchingDesc(const string& national_number,
191                          const PhoneNumberDesc& number_desc,
192                          RegExpCache* regexp_cache) {
193  return regexp_cache->GetRegExp(number_desc.possible_number_pattern())
194             .FullMatch(national_number) &&
195         regexp_cache->GetRegExp(number_desc.national_number_pattern())
196             .FullMatch(national_number);
197}
198
199PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper(
200    const string& national_number, const PhoneMetadata& metadata,
201    RegExpCache* regexp_cache) {
202  const PhoneNumberDesc& general_desc = metadata.general_desc();
203  if (!general_desc.has_national_number_pattern() ||
204      !IsNumberMatchingDesc(national_number, general_desc, regexp_cache)) {
205    VLOG(4) << "Number type unknown - doesn't match general national number"
206            << " pattern.";
207    return PhoneNumberUtil::UNKNOWN;
208  }
209  if (IsNumberMatchingDesc(national_number, metadata.premium_rate(),
210                           regexp_cache)) {
211    VLOG(4) << "Number is a premium number.";
212    return PhoneNumberUtil::PREMIUM_RATE;
213  }
214  if (IsNumberMatchingDesc(national_number, metadata.toll_free(),
215                           regexp_cache)) {
216    VLOG(4) << "Number is a toll-free number.";
217    return PhoneNumberUtil::TOLL_FREE;
218  }
219  if (IsNumberMatchingDesc(national_number, metadata.shared_cost(),
220                           regexp_cache)) {
221    VLOG(4) << "Number is a shared cost number.";
222    return PhoneNumberUtil::SHARED_COST;
223  }
224  if (IsNumberMatchingDesc(national_number, metadata.voip(), regexp_cache)) {
225    VLOG(4) << "Number is a VOIP (Voice over IP) number.";
226    return PhoneNumberUtil::VOIP;
227  }
228  if (IsNumberMatchingDesc(national_number, metadata.personal_number(),
229                           regexp_cache)) {
230    VLOG(4) << "Number is a personal number.";
231    return PhoneNumberUtil::PERSONAL_NUMBER;
232  }
233  if (IsNumberMatchingDesc(national_number, metadata.pager(), regexp_cache)) {
234    VLOG(4) << "Number is a pager number.";
235    return PhoneNumberUtil::PAGER;
236  }
237  if (IsNumberMatchingDesc(national_number, metadata.uan(), regexp_cache)) {
238    VLOG(4) << "Number is a UAN.";
239    return PhoneNumberUtil::UAN;
240  }
241  if (IsNumberMatchingDesc(national_number, metadata.voicemail(),
242                           regexp_cache)) {
243    VLOG(4) << "Number is a voicemail number.";
244    return PhoneNumberUtil::VOICEMAIL;
245  }
246
247  bool is_fixed_line =
248      IsNumberMatchingDesc(national_number, metadata.fixed_line(),
249                           regexp_cache);
250  if (is_fixed_line) {
251    if (metadata.same_mobile_and_fixed_line_pattern()) {
252      VLOG(4) << "Fixed-line and mobile patterns equal, number is fixed-line"
253              << " or mobile";
254      return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
255    } else if (IsNumberMatchingDesc(national_number, metadata.mobile(),
256                                    regexp_cache)) {
257      VLOG(4) << "Fixed-line and mobile patterns differ, but number is "
258              << "still fixed-line or mobile";
259      return PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
260    }
261    VLOG(4) << "Number is a fixed line number.";
262    return PhoneNumberUtil::FIXED_LINE;
263  }
264  // Otherwise, test to see if the number is mobile. Only do this if certain
265  // that the patterns for mobile and fixed line aren't the same.
266  if (!metadata.same_mobile_and_fixed_line_pattern() &&
267      IsNumberMatchingDesc(national_number, metadata.mobile(), regexp_cache)) {
268    VLOG(4) << "Number is a mobile number.";
269    return PhoneNumberUtil::MOBILE;
270  }
271  VLOG(4) << "Number type unknown - doesn\'t match any specific number type"
272          << " pattern.";
273  return PhoneNumberUtil::UNKNOWN;
274}
275
276char32 ToUnicodeCodepoint(const char* unicode_char) {
277  char32 codepoint;
278  EncodingUtils::DecodeUTF8Char(unicode_char, &codepoint);
279  return codepoint;
280}
281
282// Helper initialiser method to create the regular-expression pattern to match
283// extensions, allowing the one-codepoint extension symbols provided by
284// single_extn_symbols.
285// Note that there are currently three capturing groups for the extension itself
286// - if this number is changed, MaybeStripExtension needs to be updated.
287string CreateExtnPattern(const string& single_extn_symbols) {
288  static const string capturing_extn_digits = StrCat("([", kDigits, "]{1,7})");
289  // The first regular expression covers RFC 3966 format, where the extension is
290  // added using ";ext=". The second more generic one starts with optional white
291  // space and ends with an optional full stop (.), followed by zero or more
292  // spaces/tabs and then the numbers themselves. The third one covers the
293  // special case of American numbers where the extension is written with a hash
294  // at the end, such as "- 503#".
295  // Note that the only capturing groups should be around the digits that you
296  // want to capture as part of the extension, or else parsing will fail!
297  // Canonical-equivalence doesn't seem to be an option with RE2, so we allow
298  // two options for representing the ó - the character itself, and one in the
299  // unicode decomposed form with the combining acute accent.
300  return (StrCat(
301      kRfc3966ExtnPrefix, capturing_extn_digits, "|"
302       /* "[  \\t,]*(?:e?xt(?:ensi(?:ó?|ó))?n?|e?xtn?|single_extn_symbols|"
303          "int|int|anexo)"
304          "[:\\..]?[  \\t,-]*", capturing_extn_digits, "#?|" */
305      "[ \xC2\xA0\\t,]*(?:e?xt(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|"
306      "(?:\xEF\xBD\x85)?\xEF\xBD\x98\xEF\xBD\x94(?:\xEF\xBD\x8E)?|"
307      "[", single_extn_symbols, "]|int|"
308      "\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)"
309      "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits,
310      "#?|[- ]+([", kDigits, "]{1,5})#"));
311}
312
313// Normalizes a string of characters representing a phone number by replacing
314// all characters found in the accompanying map with the values therein, and
315// stripping all other characters if remove_non_matches is true.
316// Parameters:
317// number - a pointer to a string of characters representing a phone number to
318//   be normalized.
319// normalization_replacements - a mapping of characters to what they should be
320//   replaced by in the normalized version of the phone number
321// remove_non_matches - indicates whether characters that are not able to be
322//   replaced should be stripped from the number. If this is false, they will be
323//   left unchanged in the number.
324void NormalizeHelper(const map<char32, char>& normalization_replacements,
325                     bool remove_non_matches,
326                     string* number) {
327  DCHECK(number);
328  UnicodeText number_as_unicode;
329  number_as_unicode.PointToUTF8(number->data(), number->size());
330  string normalized_number;
331  char unicode_char[5];
332  for (UnicodeText::const_iterator it = number_as_unicode.begin();
333       it != number_as_unicode.end();
334       ++it) {
335    map<char32, char>::const_iterator found_glyph_pair =
336        normalization_replacements.find(*it);
337    if (found_glyph_pair != normalization_replacements.end()) {
338      normalized_number.push_back(found_glyph_pair->second);
339    } else if (!remove_non_matches) {
340      // Find out how long this unicode char is so we can append it all.
341      int char_len = it.get_utf8(unicode_char);
342      normalized_number.append(unicode_char, char_len);
343    }
344    // If neither of the above are true, we remove this character.
345  }
346  number->assign(normalized_number);
347}
348
349PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
350    const RegExp& number_pattern, const string& number) {
351  string extracted_number;
352  if (number_pattern.FullMatch(number, &extracted_number)) {
353    return PhoneNumberUtil::IS_POSSIBLE;
354  }
355  if (number_pattern.PartialMatch(number, &extracted_number)) {
356    return PhoneNumberUtil::TOO_LONG;
357  } else {
358    return PhoneNumberUtil::TOO_SHORT;
359  }
360}
361
362}  // namespace
363
364void PhoneNumberUtil::SetLogger(Logger* logger) {
365  logger_.reset(logger);
366  Logger::set_logger_impl(logger_.get());
367}
368
369class PhoneNumberRegExpsAndMappings {
370 private:
371  void InitializeMapsAndSets() {
372    diallable_char_mappings_.insert(make_pair('+', '+'));
373    diallable_char_mappings_.insert(make_pair('*', '*'));
374    // Here we insert all punctuation symbols that we wish to respect when
375    // formatting alpha numbers, as they show the intended number groupings.
376    all_plus_number_grouping_symbols_.insert(
377        make_pair(ToUnicodeCodepoint("-"), '-'));
378    all_plus_number_grouping_symbols_.insert(
379        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "-" */), '-'));
380    all_plus_number_grouping_symbols_.insert(
381        make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "‐" */), '-'));
382    all_plus_number_grouping_symbols_.insert(
383        make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "‑" */), '-'));
384    all_plus_number_grouping_symbols_.insert(
385        make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "‒" */), '-'));
386    all_plus_number_grouping_symbols_.insert(
387        make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "–" */), '-'));
388    all_plus_number_grouping_symbols_.insert(
389        make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "—" */), '-'));
390    all_plus_number_grouping_symbols_.insert(
391        make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "―" */), '-'));
392    all_plus_number_grouping_symbols_.insert(
393        make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "−" */), '-'));
394    all_plus_number_grouping_symbols_.insert(
395        make_pair(ToUnicodeCodepoint("/"), '/'));
396    all_plus_number_grouping_symbols_.insert(
397        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "/" */), '/'));
398    all_plus_number_grouping_symbols_.insert(
399        make_pair(ToUnicodeCodepoint(" "), ' '));
400    all_plus_number_grouping_symbols_.insert(
401        make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* " " */), ' '));
402    all_plus_number_grouping_symbols_.insert(
403        make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
404    all_plus_number_grouping_symbols_.insert(
405        make_pair(ToUnicodeCodepoint("."), '.'));
406    all_plus_number_grouping_symbols_.insert(
407        make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "." */), '.'));
408    // Only the upper-case letters are added here - the lower-case versions are
409    // added programmatically.
410    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("A"), '2'));
411    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("B"), '2'));
412    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("C"), '2'));
413    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("D"), '3'));
414    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("E"), '3'));
415    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("F"), '3'));
416    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("G"), '4'));
417    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("H"), '4'));
418    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("I"), '4'));
419    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("J"), '5'));
420    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("K"), '5'));
421    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("L"), '5'));
422    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("M"), '6'));
423    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("N"), '6'));
424    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("O"), '6'));
425    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("P"), '7'));
426    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Q"), '7'));
427    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("R"), '7'));
428    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("S"), '7'));
429    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("T"), '8'));
430    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("U"), '8'));
431    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("V"), '8'));
432    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("W"), '9'));
433    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("X"), '9'));
434    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Y"), '9'));
435    alpha_mappings_.insert(make_pair(ToUnicodeCodepoint("Z"), '9'));
436    map<char32, char> lower_case_mappings;
437    map<char32, char> alpha_letters;
438    for (map<char32, char>::const_iterator it = alpha_mappings_.begin();
439         it != alpha_mappings_.end();
440         ++it) {
441      // Convert all the upper-case ASCII letters to lower-case.
442      if (it->first < 128) {
443        char letter_as_upper = static_cast<char>(it->first);
444        char32 letter_as_lower = static_cast<char32>(tolower(letter_as_upper));
445        lower_case_mappings.insert(make_pair(letter_as_lower, it->second));
446        // Add the letters in both variants to the alpha_letters map. This just
447        // pairs each letter with its upper-case representation so that it can
448        // be retained when normalising alpha numbers.
449        alpha_letters.insert(make_pair(letter_as_lower, letter_as_upper));
450        alpha_letters.insert(make_pair(it->first, letter_as_upper));
451      }
452    }
453    // In the Java version we don't insert the lower-case mappings in the map,
454    // because we convert to upper case on the fly. Doing this here would
455    // involve pulling in all of ICU, which we don't want to do if we don't have
456    // to.
457    alpha_mappings_.insert(lower_case_mappings.begin(),
458                           lower_case_mappings.end());
459    alpha_phone_mappings_.insert(alpha_mappings_.begin(),
460                                 alpha_mappings_.end());
461    all_plus_number_grouping_symbols_.insert(alpha_letters.begin(),
462                                             alpha_letters.end());
463    // Add the ASCII digits so that they don't get deleted by NormalizeHelper().
464    for (char c = '0'; c <= '9'; ++c) {
465      diallable_char_mappings_.insert(make_pair(c, c));
466      alpha_phone_mappings_.insert(make_pair(c, c));
467      all_plus_number_grouping_symbols_.insert(make_pair(c, c));
468    }
469
470    mobile_token_mappings_.insert(make_pair(52, '1'));
471    mobile_token_mappings_.insert(make_pair(54, '9'));
472  }
473
474  // Small string helpers since StrCat has a maximum number of arguments. These
475  // are both used to build valid_phone_number_.
476  const string punctuation_and_star_sign_;
477  const string min_length_phone_number_pattern_;
478
479  // Regular expression of viable phone numbers. This is location independent.
480  // Checks we have at least three leading digits, and only valid punctuation,
481  // alpha characters and digits in the phone number. Does not include extension
482  // data. The symbol 'x' is allowed here as valid punctuation since it is often
483  // used as a placeholder for carrier codes, for example in Brazilian phone
484  // numbers. We also allow multiple plus-signs at the start.
485  // Corresponds to the following:
486  // [digits]{minLengthNsn}|
487  // plus_sign*(([punctuation]|[star])*[digits]){3,}
488  // ([punctuation]|[star]|[digits]|[alpha])*
489  //
490  // The first reg-ex is to allow short numbers (two digits long) to be parsed
491  // if they are entered as "15" etc, but only if there is no punctuation in
492  // them. The second expression restricts the number of digits to three or
493  // more, but then allows them to be in international form, and to have
494  // alpha-characters and punctuation.
495  const string valid_phone_number_;
496
497  // Regexp of all possible ways to write extensions, for use when parsing. This
498  // will be run as a case-insensitive regexp match. Wide character versions are
499  // also provided after each ASCII version.
500  // For parsing, we are slightly more lenient in our interpretation than for
501  // matching. Here we allow a "comma" as a possible extension indicator. When
502  // matching, this is hardly ever used to indicate this.
503  const string extn_patterns_for_parsing_;
504
505 public:
506  scoped_ptr<const AbstractRegExpFactory> regexp_factory_;
507  scoped_ptr<RegExpCache> regexp_cache_;
508
509  // A map that contains characters that are essential when dialling. That means
510  // any of the characters in this map must not be removed from a number when
511  // dialing, otherwise the call will not reach the intended destination.
512  map<char32, char> diallable_char_mappings_;
513  // These mappings map a character (key) to a specific digit that should
514  // replace it for normalization purposes.
515  map<char32, char> alpha_mappings_;
516  // For performance reasons, store a map of combining alpha_mappings with ASCII
517  // digits.
518  map<char32, char> alpha_phone_mappings_;
519
520  // Separate map of all symbols that we wish to retain when formatting alpha
521  // numbers. This includes digits, ascii letters and number grouping symbols
522  // such as "-" and " ".
523  map<char32, char> all_plus_number_grouping_symbols_;
524
525  // Map of country calling codes that use a mobile token before the area code.
526  // One example of when this is relevant is when determining the length of the
527  // national destination code, which should be the length of the area code plus
528  // the length of the mobile token.
529  map<int, char> mobile_token_mappings_;
530
531  // Pattern that makes it easy to distinguish whether a region has a unique
532  // international dialing prefix or not. If a region has a unique international
533  // prefix (e.g. 011 in USA), it will be represented as a string that contains
534  // a sequence of ASCII digits. If there are multiple available international
535  // prefixes in a region, they will be represented as a regex string that
536  // always contains character(s) other than ASCII digits.
537  // Note this regex also includes tilde, which signals waiting for the tone.
538  scoped_ptr<const RegExp> unique_international_prefix_;
539
540  scoped_ptr<const RegExp> digits_pattern_;
541  scoped_ptr<const RegExp> capturing_digit_pattern_;
542  scoped_ptr<const RegExp> capturing_ascii_digits_pattern_;
543
544  // Regular expression of acceptable characters that may start a phone number
545  // for the purposes of parsing. This allows us to strip away meaningless
546  // prefixes to phone numbers that may be mistakenly given to us. This consists
547  // of digits, the plus symbol and arabic-indic digits. This does not contain
548  // alpha characters, although they may be used later in the number. It also
549  // does not include other punctuation, as this will be stripped later during
550  // parsing and is of no information value when parsing a number. The string
551  // starting with this valid character is captured.
552  // This corresponds to VALID_START_CHAR in the java version.
553  scoped_ptr<const RegExp> valid_start_char_pattern_;
554
555  // Regular expression of valid characters before a marker that might indicate
556  // a second number.
557  scoped_ptr<const RegExp> capture_up_to_second_number_start_pattern_;
558
559  // Regular expression of trailing characters that we want to remove. We remove
560  // all characters that are not alpha or numerical characters. The hash
561  // character is retained here, as it may signify the previous block was an
562  // extension. Note the capturing block at the start to capture the rest of the
563  // number if this was a match.
564  // This corresponds to UNWANTED_END_CHAR_PATTERN in the java version.
565  scoped_ptr<const RegExp> unwanted_end_char_pattern_;
566
567  // Regular expression of groups of valid punctuation characters.
568  scoped_ptr<const RegExp> separator_pattern_;
569
570  // Regexp of all possible ways to write extensions, for use when finding phone
571  // numbers in text. This will be run as a case-insensitive regexp match. Wide
572  // character versions are also provided after each ASCII version.
573  const string extn_patterns_for_matching_;
574
575  // Regexp of all known extension prefixes used by different regions followed
576  // by 1 or more valid digits, for use when parsing.
577  scoped_ptr<const RegExp> extn_pattern_;
578
579  // We append optionally the extension pattern to the end here, as a valid
580  // phone number may have an extension prefix appended, followed by 1 or more
581  // digits.
582  scoped_ptr<const RegExp> valid_phone_number_pattern_;
583
584  // We use this pattern to check if the phone number has at least three letters
585  // in it - if so, then we treat it as a number where some phone-number digits
586  // are represented by letters.
587  scoped_ptr<const RegExp> valid_alpha_phone_pattern_;
588
589  scoped_ptr<const RegExp> first_group_capturing_pattern_;
590
591  scoped_ptr<const RegExp> carrier_code_pattern_;
592
593  scoped_ptr<const RegExp> plus_chars_pattern_;
594
595  PhoneNumberRegExpsAndMappings()
596      : punctuation_and_star_sign_(StrCat(PhoneNumberUtil::kValidPunctuation,
597                                          kStarSign)),
598        min_length_phone_number_pattern_(
599            StrCat(kDigits, "{", PhoneNumberUtil::kMinLengthForNsn, "}")),
600        valid_phone_number_(
601            StrCat(min_length_phone_number_pattern_, "|[",
602                   PhoneNumberUtil::kPlusChars, "]*(?:[",
603                   punctuation_and_star_sign_, "]*",
604                   kDigits, "){3,}[", kValidAlpha,
605                   punctuation_and_star_sign_, kDigits,
606                   "]*")),
607        extn_patterns_for_parsing_(
608            CreateExtnPattern(StrCat(",", kSingleExtnSymbolsForMatching))),
609        regexp_factory_(new RegExpFactory()),
610        regexp_cache_(new RegExpCache(*regexp_factory_.get(), 128)),
611        diallable_char_mappings_(),
612        alpha_mappings_(),
613        alpha_phone_mappings_(),
614        all_plus_number_grouping_symbols_(),
615        mobile_token_mappings_(),
616        unique_international_prefix_(regexp_factory_->CreateRegExp(
617            /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
618            "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")),
619        digits_pattern_(
620            regexp_factory_->CreateRegExp(StrCat("[", kDigits, "]*"))),
621        capturing_digit_pattern_(
622            regexp_factory_->CreateRegExp(StrCat("([", kDigits, "])"))),
623        capturing_ascii_digits_pattern_(
624            regexp_factory_->CreateRegExp("(\\d+)")),
625        valid_start_char_pattern_(regexp_factory_->CreateRegExp(
626            StrCat("[", PhoneNumberUtil::kPlusChars, kDigits, "]"))),
627        capture_up_to_second_number_start_pattern_(
628            regexp_factory_->CreateRegExp(
629                PhoneNumberUtil::kCaptureUpToSecondNumberStart)),
630        unwanted_end_char_pattern_(
631            regexp_factory_->CreateRegExp("[^\\p{N}\\p{L}#]")),
632        separator_pattern_(
633            regexp_factory_->CreateRegExp(
634                StrCat("[", PhoneNumberUtil::kValidPunctuation, "]+"))),
635        extn_patterns_for_matching_(
636            CreateExtnPattern(kSingleExtnSymbolsForMatching)),
637        extn_pattern_(regexp_factory_->CreateRegExp(
638            StrCat("(?i)(?:", extn_patterns_for_parsing_, ")$"))),
639        valid_phone_number_pattern_(regexp_factory_->CreateRegExp(
640            StrCat("(?i)", valid_phone_number_,
641                   "(?:", extn_patterns_for_parsing_, ")?"))),
642        valid_alpha_phone_pattern_(regexp_factory_->CreateRegExp(
643            StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}"))),
644        // The first_group_capturing_pattern was originally set to $1 but there
645        // are some countries for which the first group is not used in the
646        // national pattern (e.g. Argentina) so the $1 group does not match
647        // correctly. Therefore, we use \d, so that the first group actually
648        // used in the pattern will be matched.
649        first_group_capturing_pattern_(
650            regexp_factory_->CreateRegExp("(\\$\\d)")),
651        carrier_code_pattern_(regexp_factory_->CreateRegExp("\\$CC")),
652        plus_chars_pattern_(
653            regexp_factory_->CreateRegExp(
654                StrCat("[", PhoneNumberUtil::kPlusChars, "]+"))) {
655    InitializeMapsAndSets();
656  }
657
658 private:
659  DISALLOW_COPY_AND_ASSIGN(PhoneNumberRegExpsAndMappings);
660};
661
662// Private constructor. Also takes care of initialisation.
663PhoneNumberUtil::PhoneNumberUtil()
664    : logger_(Logger::set_logger_impl(new NullLogger())),
665      reg_exps_(new PhoneNumberRegExpsAndMappings),
666      country_calling_code_to_region_code_map_(new vector<IntRegionsPair>()),
667      nanpa_regions_(new set<string>()),
668      region_to_metadata_map_(new map<string, PhoneMetadata>()),
669      country_code_to_non_geographical_metadata_map_(
670          new map<int, PhoneMetadata>) {
671  Logger::set_logger_impl(logger_.get());
672  // TODO: Update the java version to put the contents of the init
673  // method inside the constructor as well to keep both in sync.
674  PhoneMetadataCollection metadata_collection;
675  if (!LoadCompiledInMetadata(&metadata_collection)) {
676    LOG(DFATAL) << "Could not parse compiled-in metadata.";
677    return;
678  }
679  // Storing data in a temporary map to make it easier to find other regions
680  // that share a country calling code when inserting data.
681  map<int, list<string>* > country_calling_code_to_region_map;
682  for (RepeatedPtrField<PhoneMetadata>::const_iterator it =
683           metadata_collection.metadata().begin();
684       it != metadata_collection.metadata().end();
685       ++it) {
686    const string& region_code = it->id();
687    if (region_code == RegionCode::GetUnknown()) {
688      continue;
689    }
690
691    int country_calling_code = it->country_code();
692    if (kRegionCodeForNonGeoEntity == region_code) {
693      country_code_to_non_geographical_metadata_map_->insert(
694          make_pair(country_calling_code, *it));
695    } else {
696      region_to_metadata_map_->insert(make_pair(region_code, *it));
697    }
698    map<int, list<string>* >::iterator calling_code_in_map =
699        country_calling_code_to_region_map.find(country_calling_code);
700    if (calling_code_in_map != country_calling_code_to_region_map.end()) {
701      if (it->main_country_for_code()) {
702        calling_code_in_map->second->push_front(region_code);
703      } else {
704        calling_code_in_map->second->push_back(region_code);
705      }
706    } else {
707      // For most country calling codes, there will be only one region code.
708      list<string>* list_with_region_code = new list<string>();
709      list_with_region_code->push_back(region_code);
710      country_calling_code_to_region_map.insert(
711          make_pair(country_calling_code, list_with_region_code));
712    }
713    if (country_calling_code == kNanpaCountryCode) {
714        nanpa_regions_->insert(region_code);
715    }
716  }
717
718  country_calling_code_to_region_code_map_->insert(
719      country_calling_code_to_region_code_map_->begin(),
720      country_calling_code_to_region_map.begin(),
721      country_calling_code_to_region_map.end());
722  // Sort all the pairs in ascending order according to country calling code.
723  sort(country_calling_code_to_region_code_map_->begin(),
724       country_calling_code_to_region_code_map_->end(),
725       OrderByFirst());
726}
727
728PhoneNumberUtil::~PhoneNumberUtil() {
729  STLDeleteContainerPairSecondPointers(
730      country_calling_code_to_region_code_map_->begin(),
731      country_calling_code_to_region_code_map_->end());
732}
733
734void PhoneNumberUtil::GetSupportedRegions(set<string>* regions) const {
735  DCHECK(regions);
736  for (map<string, PhoneMetadata>::const_iterator it =
737       region_to_metadata_map_->begin(); it != region_to_metadata_map_->end();
738       ++it) {
739    regions->insert(it->first);
740  }
741}
742
743// Public wrapper function to get a PhoneNumberUtil instance with the default
744// metadata file.
745// static
746PhoneNumberUtil* PhoneNumberUtil::GetInstance() {
747  return Singleton<PhoneNumberUtil>::GetInstance();
748}
749
750const string& PhoneNumberUtil::GetExtnPatternsForMatching() const {
751  return reg_exps_->extn_patterns_for_matching_;
752}
753
754bool PhoneNumberUtil::StartsWithPlusCharsPattern(const string& number)
755    const {
756  const scoped_ptr<RegExpInput> number_string_piece(
757      reg_exps_->regexp_factory_->CreateInput(number));
758  return reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get());
759}
760
761bool PhoneNumberUtil::ContainsOnlyValidDigits(const string& s) const {
762  return reg_exps_->digits_pattern_->FullMatch(s);
763}
764
765void PhoneNumberUtil::TrimUnwantedEndChars(string* number) const {
766  DCHECK(number);
767  UnicodeText number_as_unicode;
768  number_as_unicode.PointToUTF8(number->data(), number->size());
769  char current_char[5];
770  int len;
771  UnicodeText::const_reverse_iterator reverse_it(number_as_unicode.end());
772  for (; reverse_it.base() != number_as_unicode.begin(); ++reverse_it) {
773    len = reverse_it.get_utf8(current_char);
774    current_char[len] = '\0';
775    if (!reg_exps_->unwanted_end_char_pattern_->FullMatch(current_char)) {
776      break;
777    }
778  }
779
780  number->assign(UnicodeText::UTF8Substring(number_as_unicode.begin(),
781                                            reverse_it.base()));
782}
783
784bool PhoneNumberUtil::IsFormatEligibleForAsYouTypeFormatter(
785    const string& format) const {
786  // A pattern that is used to determine if a numberFormat under
787  // availableFormats is eligible to be used by the AYTF. It is eligible when
788  // the format element under numberFormat contains groups of the dollar sign
789  // followed by a single digit, separated by valid phone number punctuation.
790  // This prevents invalid punctuation (such as the star sign in Israeli star
791  // numbers) getting into the output of the AYTF.
792  const RegExp& eligible_format_pattern = reg_exps_->regexp_cache_->GetRegExp(
793      StrCat("[", kValidPunctuation, "]*", "(\\$\\d", "[",
794             kValidPunctuation, "]*)+"));
795  return eligible_format_pattern.FullMatch(format);
796}
797
798bool PhoneNumberUtil::FormattingRuleHasFirstGroupOnly(
799    const string& national_prefix_formatting_rule) const {
800  // A pattern that is used to determine if the national prefix formatting rule
801  // has the first group only, i.e., does not start with the national prefix.
802  // Note that the pattern explicitly allows for unbalanced parentheses.
803  const RegExp& first_group_only_prefix_pattern =
804      reg_exps_->regexp_cache_->GetRegExp("\\(?\\$1\\)?");
805  return national_prefix_formatting_rule.empty() ||
806      first_group_only_prefix_pattern.FullMatch(
807          national_prefix_formatting_rule);
808}
809
810void PhoneNumberUtil::GetNddPrefixForRegion(const string& region_code,
811                                            bool strip_non_digits,
812                                            string* national_prefix) const {
813  DCHECK(national_prefix);
814  const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
815  if (!metadata) {
816    LOG(WARNING) << "Invalid or unknown region code (" << region_code
817                 << ") provided.";
818    return;
819  }
820  national_prefix->assign(metadata->national_prefix());
821  if (strip_non_digits) {
822    // Note: if any other non-numeric symbols are ever used in national
823    // prefixes, these would have to be removed here as well.
824    strrmm(national_prefix, "~");
825  }
826}
827
828bool PhoneNumberUtil::IsValidRegionCode(const string& region_code) const {
829  return (region_to_metadata_map_->find(region_code) !=
830          region_to_metadata_map_->end());
831}
832
833bool PhoneNumberUtil::HasValidCountryCallingCode(
834    int country_calling_code) const {
835  // Create an IntRegionsPair with the country_code passed in, and use it to
836  // locate the pair with the same country_code in the sorted vector.
837  IntRegionsPair target_pair;
838  target_pair.first = country_calling_code;
839  return (binary_search(country_calling_code_to_region_code_map_->begin(),
840                        country_calling_code_to_region_code_map_->end(),
841                        target_pair, OrderByFirst()));
842}
843
844// Returns a pointer to the phone metadata for the appropriate region or NULL
845// if the region code is invalid or unknown.
846const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegion(
847    const string& region_code) const {
848  map<string, PhoneMetadata>::const_iterator it =
849      region_to_metadata_map_->find(region_code);
850  if (it != region_to_metadata_map_->end()) {
851    return &it->second;
852  }
853  return NULL;
854}
855
856const PhoneMetadata* PhoneNumberUtil::GetMetadataForNonGeographicalRegion(
857    int country_calling_code) const {
858  map<int, PhoneMetadata>::const_iterator it =
859      country_code_to_non_geographical_metadata_map_->find(
860          country_calling_code);
861  if (it != country_code_to_non_geographical_metadata_map_->end()) {
862    return &it->second;
863  }
864  return NULL;
865}
866
867void PhoneNumberUtil::Format(const PhoneNumber& number,
868                             PhoneNumberFormat number_format,
869                             string* formatted_number) const {
870  DCHECK(formatted_number);
871  if (number.national_number() == 0) {
872    const string& raw_input = number.raw_input();
873    if (!raw_input.empty()) {
874      // Unparseable numbers that kept their raw input just use that.
875      // This is the only case where a number can be formatted as E164 without a
876      // leading '+' symbol (but the original number wasn't parseable anyway).
877      // TODO: Consider removing the 'if' above so that unparseable
878      // strings without raw input format to the empty string instead of "+00".
879      formatted_number->assign(raw_input);
880      return;
881    }
882  }
883  int country_calling_code = number.country_code();
884  string national_significant_number;
885  GetNationalSignificantNumber(number, &national_significant_number);
886  if (number_format == E164) {
887    // Early exit for E164 case (even if the country calling code is invalid)
888    // since no formatting of the national number needs to be applied.
889    // Extensions are not formatted.
890    formatted_number->assign(national_significant_number);
891    PrefixNumberWithCountryCallingCode(country_calling_code, E164,
892                                       formatted_number);
893    return;
894  }
895  if (!HasValidCountryCallingCode(country_calling_code)) {
896    formatted_number->assign(national_significant_number);
897    return;
898  }
899  // Note here that all NANPA formatting rules are contained by US, so we use
900  // that to format NANPA numbers. The same applies to Russian Fed regions -
901  // rules are contained by Russia. French Indian Ocean country rules are
902  // contained by Réunion.
903  string region_code;
904  GetRegionCodeForCountryCode(country_calling_code, &region_code);
905  // Metadata cannot be NULL because the country calling code is valid (which
906  // means that the region code cannot be ZZ and must be one of our supported
907  // region codes).
908  const PhoneMetadata* metadata =
909      GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
910  FormatNsn(national_significant_number, *metadata, number_format,
911            formatted_number);
912  MaybeAppendFormattedExtension(number, *metadata, number_format,
913                                formatted_number);
914  PrefixNumberWithCountryCallingCode(country_calling_code, number_format,
915                                     formatted_number);
916}
917
918void PhoneNumberUtil::FormatByPattern(
919    const PhoneNumber& number,
920    PhoneNumberFormat number_format,
921    const RepeatedPtrField<NumberFormat>& user_defined_formats,
922    string* formatted_number) const {
923  DCHECK(formatted_number);
924  int country_calling_code = number.country_code();
925  // Note GetRegionCodeForCountryCode() is used because formatting information
926  // for regions which share a country calling code is contained by only one
927  // region for performance reasons. For example, for NANPA regions it will be
928  // contained in the metadata for US.
929  string national_significant_number;
930  GetNationalSignificantNumber(number, &national_significant_number);
931  if (!HasValidCountryCallingCode(country_calling_code)) {
932    formatted_number->assign(national_significant_number);
933    return;
934  }
935  string region_code;
936  GetRegionCodeForCountryCode(country_calling_code, &region_code);
937  // Metadata cannot be NULL because the country calling code is valid.
938  const PhoneMetadata* metadata =
939      GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
940  const NumberFormat* formatting_pattern =
941      ChooseFormattingPatternForNumber(user_defined_formats,
942                                       national_significant_number);
943  if (!formatting_pattern) {
944    // If no pattern above is matched, we format the number as a whole.
945    formatted_number->assign(national_significant_number);
946  } else {
947    NumberFormat num_format_copy;
948    // Before we do a replacement of the national prefix pattern $NP with the
949    // national prefix, we need to copy the rule so that subsequent replacements
950    // for different numbers have the appropriate national prefix.
951    num_format_copy.MergeFrom(*formatting_pattern);
952    string national_prefix_formatting_rule(
953        formatting_pattern->national_prefix_formatting_rule());
954    if (!national_prefix_formatting_rule.empty()) {
955      const string& national_prefix = metadata->national_prefix();
956      if (!national_prefix.empty()) {
957        // Replace $NP with national prefix and $FG with the first group ($1).
958        GlobalReplaceSubstring("$NP", national_prefix,
959                               &national_prefix_formatting_rule);
960        GlobalReplaceSubstring("$FG", "$1",
961                               &national_prefix_formatting_rule);
962        num_format_copy.set_national_prefix_formatting_rule(
963            national_prefix_formatting_rule);
964      } else {
965        // We don't want to have a rule for how to format the national prefix if
966        // there isn't one.
967        num_format_copy.clear_national_prefix_formatting_rule();
968      }
969    }
970    FormatNsnUsingPattern(national_significant_number, num_format_copy,
971                          number_format, formatted_number);
972  }
973  MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number);
974  PrefixNumberWithCountryCallingCode(country_calling_code, number_format,
975                                     formatted_number);
976}
977
978void PhoneNumberUtil::FormatNationalNumberWithCarrierCode(
979    const PhoneNumber& number,
980    const string& carrier_code,
981    string* formatted_number) const {
982  int country_calling_code = number.country_code();
983  string national_significant_number;
984  GetNationalSignificantNumber(number, &national_significant_number);
985  if (!HasValidCountryCallingCode(country_calling_code)) {
986    formatted_number->assign(national_significant_number);
987    return;
988  }
989
990  // Note GetRegionCodeForCountryCode() is used because formatting information
991  // for regions which share a country calling code is contained by only one
992  // region for performance reasons. For example, for NANPA regions it will be
993  // contained in the metadata for US.
994  string region_code;
995  GetRegionCodeForCountryCode(country_calling_code, &region_code);
996  // Metadata cannot be NULL because the country calling code is valid.
997  const PhoneMetadata* metadata =
998      GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
999  FormatNsnWithCarrier(national_significant_number, *metadata, NATIONAL,
1000                       carrier_code, formatted_number);
1001  MaybeAppendFormattedExtension(number, *metadata, NATIONAL, formatted_number);
1002  PrefixNumberWithCountryCallingCode(country_calling_code, NATIONAL,
1003                                     formatted_number);
1004}
1005
1006const PhoneMetadata* PhoneNumberUtil::GetMetadataForRegionOrCallingCode(
1007      int country_calling_code, const string& region_code) const {
1008  return kRegionCodeForNonGeoEntity == region_code
1009      ? GetMetadataForNonGeographicalRegion(country_calling_code)
1010      : GetMetadataForRegion(region_code);
1011}
1012
1013void PhoneNumberUtil::FormatNationalNumberWithPreferredCarrierCode(
1014    const PhoneNumber& number,
1015    const string& fallback_carrier_code,
1016    string* formatted_number) const {
1017  FormatNationalNumberWithCarrierCode(
1018      number,
1019      number.has_preferred_domestic_carrier_code()
1020          ? number.preferred_domestic_carrier_code()
1021          : fallback_carrier_code,
1022      formatted_number);
1023}
1024
1025void PhoneNumberUtil::FormatNumberForMobileDialing(
1026    const PhoneNumber& number,
1027    const string& calling_from,
1028    bool with_formatting,
1029    string* formatted_number) const {
1030  int country_calling_code = number.country_code();
1031  if (!HasValidCountryCallingCode(country_calling_code)) {
1032    formatted_number->assign(number.has_raw_input() ? number.raw_input() : "");
1033    return;
1034  }
1035
1036  formatted_number->assign("");
1037  // Clear the extension, as that part cannot normally be dialed together with
1038  // the main number.
1039  PhoneNumber number_no_extension(number);
1040  number_no_extension.clear_extension();
1041  string region_code;
1042  GetRegionCodeForCountryCode(country_calling_code, &region_code);
1043  if (calling_from == region_code) {
1044    PhoneNumberType number_type = GetNumberType(number_no_extension);
1045    bool is_fixed_line_or_mobile =
1046        (number_type == FIXED_LINE) || (number_type == MOBILE) ||
1047        (number_type == FIXED_LINE_OR_MOBILE);
1048    // Carrier codes may be needed in some countries. We handle this here.
1049    if ((region_code == "CO") && (number_type == FIXED_LINE)) {
1050      FormatNationalNumberWithCarrierCode(
1051          number_no_extension, kColombiaMobileToFixedLinePrefix,
1052          formatted_number);
1053    } else if ((region_code == "BR") && (is_fixed_line_or_mobile)) {
1054      if (number_no_extension.has_preferred_domestic_carrier_code()) {
1055      FormatNationalNumberWithPreferredCarrierCode(number_no_extension, "",
1056                                                   formatted_number);
1057      } else {
1058        // Brazilian fixed line and mobile numbers need to be dialed with a
1059        // carrier code when called within Brazil. Without that, most of the
1060        // carriers won't connect the call. Because of that, we return an empty
1061        // string here.
1062        formatted_number->assign("");
1063      }
1064    } else if (region_code == "HU") {
1065      // The national format for HU numbers doesn't contain the national prefix,
1066      // because that is how numbers are normally written down. However, the
1067      // national prefix is obligatory when dialing from a mobile phone. As a
1068      // result, we add it back here.
1069      Format(number_no_extension, NATIONAL, formatted_number);
1070      string hu_national_prefix;
1071      GetNddPrefixForRegion(region_code, true /* strip non-digits */,
1072                            &hu_national_prefix);
1073      formatted_number->assign(
1074          StrCat(hu_national_prefix, " ", *formatted_number));
1075    } else {
1076      // For NANPA countries, non-geographical countries, Mexican and Chilean
1077      // fixed line and mobile numbers, we output international format for
1078      // numbers that can be dialed internationally as that always works.
1079      if ((country_calling_code == kNanpaCountryCode ||
1080           region_code == kRegionCodeForNonGeoEntity ||
1081           // MX fixed line and mobile numbers should always be formatted in
1082           // international format, even when dialed within MX. For national
1083           // format to work, a carrier code needs to be used, and the correct
1084           // carrier code depends on if the caller and callee are from the same
1085           // local area. It is trickier to get that to work correctly than
1086           // using international format, which is tested to work fine on all
1087           // carriers.
1088           // CL fixed line numbers need the national prefix when dialing in the
1089           // national format, but don't have it when used for display. The
1090           // reverse is true for mobile numbers. As a result, we output them in
1091           // the international format to make it work.
1092           ((region_code == "MX" || region_code == "CL") &&
1093               is_fixed_line_or_mobile)) &&
1094          CanBeInternationallyDialled(number_no_extension)) {
1095        Format(number_no_extension, INTERNATIONAL, formatted_number);
1096      } else {
1097        Format(number_no_extension, NATIONAL, formatted_number);
1098      }
1099    }
1100  } else if (CanBeInternationallyDialled(number_no_extension)) {
1101    with_formatting
1102        ? Format(number_no_extension, INTERNATIONAL, formatted_number)
1103        : Format(number_no_extension, E164, formatted_number);
1104    return;
1105  }
1106  if (!with_formatting) {
1107    NormalizeDiallableCharsOnly(formatted_number);
1108  }
1109}
1110
1111void PhoneNumberUtil::FormatOutOfCountryCallingNumber(
1112    const PhoneNumber& number,
1113    const string& calling_from,
1114    string* formatted_number) const {
1115  DCHECK(formatted_number);
1116  if (!IsValidRegionCode(calling_from)) {
1117    LOG(WARNING) << "Trying to format number from invalid region "
1118                 << calling_from
1119                 << ". International formatting applied.";
1120    Format(number, INTERNATIONAL, formatted_number);
1121    return;
1122  }
1123  int country_code = number.country_code();
1124  string national_significant_number;
1125  GetNationalSignificantNumber(number, &national_significant_number);
1126  if (!HasValidCountryCallingCode(country_code)) {
1127    formatted_number->assign(national_significant_number);
1128    return;
1129  }
1130  if (country_code == kNanpaCountryCode) {
1131    if (IsNANPACountry(calling_from)) {
1132      // For NANPA regions, return the national format for these regions but
1133      // prefix it with the country calling code.
1134      Format(number, NATIONAL, formatted_number);
1135      formatted_number->insert(0, StrCat(country_code, " "));
1136      return;
1137    }
1138  } else if (country_code == GetCountryCodeForValidRegion(calling_from)) {
1139    // If neither region is a NANPA region, then we check to see if the
1140    // country calling code of the number and the country calling code of the
1141    // region we are calling from are the same.
1142    // For regions that share a country calling code, the country calling code
1143    // need not be dialled. This also applies when dialling within a region, so
1144    // this if clause covers both these cases.
1145    // Technically this is the case for dialling from la Réunion to other
1146    // overseas departments of France (French Guiana, Martinique, Guadeloupe),
1147    // but not vice versa - so we don't cover this edge case for now and for
1148    // those cases return the version including country calling code.
1149    // Details here:
1150    // http://www.petitfute.com/voyage/225-info-pratiques-reunion
1151    Format(number, NATIONAL, formatted_number);
1152    return;
1153  }
1154  // Metadata cannot be NULL because we checked 'IsValidRegionCode()' above.
1155  const PhoneMetadata* metadata_calling_from =
1156      GetMetadataForRegion(calling_from);
1157  const string& international_prefix =
1158      metadata_calling_from->international_prefix();
1159
1160  // For regions that have multiple international prefixes, the international
1161  // format of the number is returned, unless there is a preferred international
1162  // prefix.
1163  const string international_prefix_for_formatting(
1164      reg_exps_->unique_international_prefix_->FullMatch(international_prefix)
1165      ? international_prefix
1166      : metadata_calling_from->preferred_international_prefix());
1167
1168  string region_code;
1169  GetRegionCodeForCountryCode(country_code, &region_code);
1170  // Metadata cannot be NULL because the country_code is valid.
1171  const PhoneMetadata* metadata_for_region =
1172      GetMetadataForRegionOrCallingCode(country_code, region_code);
1173  FormatNsn(national_significant_number, *metadata_for_region, INTERNATIONAL,
1174            formatted_number);
1175  MaybeAppendFormattedExtension(number, *metadata_for_region, INTERNATIONAL,
1176                                formatted_number);
1177  if (!international_prefix_for_formatting.empty()) {
1178    formatted_number->insert(
1179        0, StrCat(international_prefix_for_formatting, " ", country_code, " "));
1180  } else {
1181    PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL,
1182                                       formatted_number);
1183  }
1184}
1185
1186void PhoneNumberUtil::FormatInOriginalFormat(const PhoneNumber& number,
1187                                             const string& region_calling_from,
1188                                             string* formatted_number) const {
1189  DCHECK(formatted_number);
1190
1191  if (number.has_raw_input() &&
1192      (HasUnexpectedItalianLeadingZero(number) ||
1193       !HasFormattingPatternForNumber(number))) {
1194    // We check if we have the formatting pattern because without that, we might
1195    // format the number as a group without national prefix.
1196    formatted_number->assign(number.raw_input());
1197    return;
1198  }
1199  if (!number.has_country_code_source()) {
1200    Format(number, NATIONAL, formatted_number);
1201    return;
1202  }
1203  switch (number.country_code_source()) {
1204    case PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN:
1205      Format(number, INTERNATIONAL, formatted_number);
1206      break;
1207    case PhoneNumber::FROM_NUMBER_WITH_IDD:
1208      FormatOutOfCountryCallingNumber(number, region_calling_from,
1209                                      formatted_number);
1210      break;
1211    case PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN:
1212      Format(number, INTERNATIONAL, formatted_number);
1213      formatted_number->erase(formatted_number->begin());
1214      break;
1215    case PhoneNumber::FROM_DEFAULT_COUNTRY:
1216      // Fall-through to default case.
1217    default:
1218      string region_code;
1219      GetRegionCodeForCountryCode(number.country_code(), &region_code);
1220      // We strip non-digits from the NDD here, and from the raw input later, so
1221      // that we can compare them easily.
1222      string national_prefix;
1223      GetNddPrefixForRegion(region_code, true /* strip non-digits */,
1224                            &national_prefix);
1225      if (national_prefix.empty()) {
1226        // If the region doesn't have a national prefix at all, we can safely
1227        // return the national format without worrying about a national prefix
1228        // being added.
1229        Format(number, NATIONAL, formatted_number);
1230        break;
1231      }
1232      // Otherwise, we check if the original number was entered with a national
1233      // prefix.
1234      if (RawInputContainsNationalPrefix(number.raw_input(), national_prefix,
1235                                         region_code)) {
1236        // If so, we can safely return the national format.
1237        Format(number, NATIONAL, formatted_number);
1238        break;
1239      }
1240      // Metadata cannot be NULL here because GetNddPrefixForRegion() (above)
1241      // leaves the prefix empty if there is no metadata for the region.
1242      const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
1243      string national_number;
1244      GetNationalSignificantNumber(number, &national_number);
1245      // This shouldn't be NULL, because we have checked that above with
1246      // HasFormattingPatternForNumber.
1247      const NumberFormat* format_rule =
1248          ChooseFormattingPatternForNumber(metadata->number_format(),
1249                                           national_number);
1250      // The format rule could still be NULL here if the national number was 0
1251      // and there was no raw input (this should not be possible for numbers
1252      // generated by the phonenumber library as they would also not have a
1253      // country calling code and we would have exited earlier).
1254      if (!format_rule) {
1255        Format(number, NATIONAL, formatted_number);
1256        break;
1257      }
1258      // When the format we apply to this number doesn't contain national
1259      // prefix, we can just return the national format.
1260      // TODO: Refactor the code below with the code in
1261      // IsNationalPrefixPresentIfRequired.
1262      string candidate_national_prefix_rule(
1263          format_rule->national_prefix_formatting_rule());
1264      // We assume that the first-group symbol will never be _before_ the
1265      // national prefix.
1266      if (!candidate_national_prefix_rule.empty()) {
1267        candidate_national_prefix_rule.erase(
1268            candidate_national_prefix_rule.find("$1"));
1269        NormalizeDigitsOnly(&candidate_national_prefix_rule);
1270      }
1271      if (candidate_national_prefix_rule.empty()) {
1272        // National prefix not used when formatting this number.
1273        Format(number, NATIONAL, formatted_number);
1274        break;
1275      }
1276      // Otherwise, we need to remove the national prefix from our output.
1277      RepeatedPtrField<NumberFormat> number_formats;
1278      NumberFormat* number_format = number_formats.Add();
1279      number_format->MergeFrom(*format_rule);
1280      number_format->clear_national_prefix_formatting_rule();
1281      FormatByPattern(number, NATIONAL, number_formats, formatted_number);
1282      break;
1283  }
1284  // If no digit is inserted/removed/modified as a result of our formatting, we
1285  // return the formatted phone number; otherwise we return the raw input the
1286  // user entered.
1287  if (!formatted_number->empty() && !number.raw_input().empty()) {
1288    string normalized_formatted_number(*formatted_number);
1289    NormalizeDiallableCharsOnly(&normalized_formatted_number);
1290    string normalized_raw_input(number.raw_input());
1291    NormalizeDiallableCharsOnly(&normalized_raw_input);
1292    if (normalized_formatted_number != normalized_raw_input) {
1293      formatted_number->assign(number.raw_input());
1294    }
1295  }
1296}
1297
1298// Check if raw_input, which is assumed to be in the national format, has a
1299// national prefix. The national prefix is assumed to be in digits-only form.
1300bool PhoneNumberUtil::RawInputContainsNationalPrefix(
1301    const string& raw_input,
1302    const string& national_prefix,
1303    const string& region_code) const {
1304  string normalized_national_number(raw_input);
1305  NormalizeDigitsOnly(&normalized_national_number);
1306  if (HasPrefixString(normalized_national_number, national_prefix)) {
1307    // Some Japanese numbers (e.g. 00777123) might be mistaken to contain
1308    // the national prefix when written without it (e.g. 0777123) if we just
1309    // do prefix matching. To tackle that, we check the validity of the
1310    // number if the assumed national prefix is removed (777123 won't be
1311    // valid in Japan).
1312    PhoneNumber number_without_national_prefix;
1313    if (Parse(normalized_national_number.substr(national_prefix.length()),
1314              region_code, &number_without_national_prefix)
1315        == NO_PARSING_ERROR) {
1316      return IsValidNumber(number_without_national_prefix);
1317    }
1318  }
1319  return false;
1320}
1321
1322bool PhoneNumberUtil::HasUnexpectedItalianLeadingZero(
1323    const PhoneNumber& number) const {
1324  return number.has_italian_leading_zero() &&
1325      !IsLeadingZeroPossible(number.country_code());
1326}
1327
1328bool PhoneNumberUtil::HasFormattingPatternForNumber(
1329    const PhoneNumber& number) const {
1330  int country_calling_code = number.country_code();
1331  string region_code;
1332  GetRegionCodeForCountryCode(country_calling_code, &region_code);
1333  const PhoneMetadata* metadata =
1334      GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
1335  if (!metadata) {
1336    return false;
1337  }
1338  string national_number;
1339  GetNationalSignificantNumber(number, &national_number);
1340  const NumberFormat* format_rule =
1341      ChooseFormattingPatternForNumber(metadata->number_format(),
1342                                       national_number);
1343  return format_rule;
1344}
1345
1346void PhoneNumberUtil::FormatOutOfCountryKeepingAlphaChars(
1347    const PhoneNumber& number,
1348    const string& calling_from,
1349    string* formatted_number) const {
1350  // If there is no raw input, then we can't keep alpha characters because there
1351  // aren't any. In this case, we return FormatOutOfCountryCallingNumber.
1352  if (number.raw_input().empty()) {
1353    FormatOutOfCountryCallingNumber(number, calling_from, formatted_number);
1354    return;
1355  }
1356  int country_code = number.country_code();
1357  if (!HasValidCountryCallingCode(country_code)) {
1358    formatted_number->assign(number.raw_input());
1359    return;
1360  }
1361  // Strip any prefix such as country calling code, IDD, that was present. We do
1362  // this by comparing the number in raw_input with the parsed number.
1363  string raw_input_copy(number.raw_input());
1364  // Normalize punctuation. We retain number grouping symbols such as " " only.
1365  NormalizeHelper(reg_exps_->all_plus_number_grouping_symbols_, true,
1366                  &raw_input_copy);
1367  // Now we trim everything before the first three digits in the parsed number.
1368  // We choose three because all valid alpha numbers have 3 digits at the start
1369  // - if it does not, then we don't trim anything at all. Similarly, if the
1370  // national number was less than three digits, we don't trim anything at all.
1371  string national_number;
1372  GetNationalSignificantNumber(number, &national_number);
1373  if (national_number.length() > 3) {
1374    size_t first_national_number_digit =
1375        raw_input_copy.find(national_number.substr(0, 3));
1376    if (first_national_number_digit != string::npos) {
1377      raw_input_copy = raw_input_copy.substr(first_national_number_digit);
1378    }
1379  }
1380  const PhoneMetadata* metadata = GetMetadataForRegion(calling_from);
1381  if (country_code == kNanpaCountryCode) {
1382    if (IsNANPACountry(calling_from)) {
1383      StrAppend(formatted_number, country_code, " ", raw_input_copy);
1384      return;
1385    }
1386  } else if (metadata &&
1387             country_code == GetCountryCodeForValidRegion(calling_from)) {
1388    const NumberFormat* formatting_pattern =
1389        ChooseFormattingPatternForNumber(metadata->number_format(),
1390                                         national_number);
1391    if (!formatting_pattern) {
1392      // If no pattern above is matched, we format the original input.
1393      formatted_number->assign(raw_input_copy);
1394      return;
1395    }
1396    NumberFormat new_format;
1397    new_format.MergeFrom(*formatting_pattern);
1398    // The first group is the first group of digits that the user wrote
1399    // together.
1400    new_format.set_pattern("(\\d+)(.*)");
1401    // Here we just concatenate them back together after the national prefix
1402    // has been fixed.
1403    new_format.set_format("$1$2");
1404    // Now we format using this pattern instead of the default pattern, but
1405    // with the national prefix prefixed if necessary.
1406    // This will not work in the cases where the pattern (and not the
1407    // leading digits) decide whether a national prefix needs to be used, since
1408    // we have overridden the pattern to match anything, but that is not the
1409    // case in the metadata to date.
1410    FormatNsnUsingPattern(raw_input_copy, new_format, NATIONAL,
1411                          formatted_number);
1412    return;
1413  }
1414
1415  string international_prefix_for_formatting;
1416  // If an unsupported region-calling-from is entered, or a country with
1417  // multiple international prefixes, the international format of the number is
1418  // returned, unless there is a preferred international prefix.
1419  if (metadata) {
1420    const string& international_prefix = metadata->international_prefix();
1421    international_prefix_for_formatting =
1422        reg_exps_->unique_international_prefix_->FullMatch(international_prefix)
1423        ? international_prefix
1424        : metadata->preferred_international_prefix();
1425  }
1426  if (!international_prefix_for_formatting.empty()) {
1427    StrAppend(formatted_number, international_prefix_for_formatting, " ",
1428              country_code, " ", raw_input_copy);
1429  } else {
1430    // Invalid region entered as country-calling-from (so no metadata was found
1431    // for it) or the region chosen has multiple international dialling
1432    // prefixes.
1433    LOG(WARNING) << "Trying to format number from invalid region "
1434                 << calling_from
1435                 << ". International formatting applied.";
1436    formatted_number->assign(raw_input_copy);
1437    PrefixNumberWithCountryCallingCode(country_code, INTERNATIONAL,
1438                                       formatted_number);
1439  }
1440}
1441
1442const NumberFormat* PhoneNumberUtil::ChooseFormattingPatternForNumber(
1443    const RepeatedPtrField<NumberFormat>& available_formats,
1444    const string& national_number) const {
1445  for (RepeatedPtrField<NumberFormat>::const_iterator
1446       it = available_formats.begin(); it != available_formats.end(); ++it) {
1447    int size = it->leading_digits_pattern_size();
1448    if (size > 0) {
1449      const scoped_ptr<RegExpInput> number_copy(
1450          reg_exps_->regexp_factory_->CreateInput(national_number));
1451      // We always use the last leading_digits_pattern, as it is the most
1452      // detailed.
1453      if (!reg_exps_->regexp_cache_->GetRegExp(
1454              it->leading_digits_pattern(size - 1)).Consume(
1455                  number_copy.get())) {
1456        continue;
1457      }
1458    }
1459    const RegExp& pattern_to_match(
1460        reg_exps_->regexp_cache_->GetRegExp(it->pattern()));
1461    if (pattern_to_match.FullMatch(national_number)) {
1462      return &(*it);
1463    }
1464  }
1465  return NULL;
1466}
1467
1468// Note that carrier_code is optional - if an empty string, no carrier code
1469// replacement will take place.
1470void PhoneNumberUtil::FormatNsnUsingPatternWithCarrier(
1471    const string& national_number,
1472    const NumberFormat& formatting_pattern,
1473    PhoneNumberUtil::PhoneNumberFormat number_format,
1474    const string& carrier_code,
1475    string* formatted_number) const {
1476  DCHECK(formatted_number);
1477  string number_format_rule(formatting_pattern.format());
1478  if (number_format == PhoneNumberUtil::NATIONAL &&
1479      carrier_code.length() > 0 &&
1480      formatting_pattern.domestic_carrier_code_formatting_rule().length() > 0) {
1481    // Replace the $CC in the formatting rule with the desired carrier code.
1482    string carrier_code_formatting_rule =
1483        formatting_pattern.domestic_carrier_code_formatting_rule();
1484    reg_exps_->carrier_code_pattern_->Replace(&carrier_code_formatting_rule,
1485                                              carrier_code);
1486    reg_exps_->first_group_capturing_pattern_->
1487        Replace(&number_format_rule, carrier_code_formatting_rule);
1488  } else {
1489    // Use the national prefix formatting rule instead.
1490    string national_prefix_formatting_rule =
1491        formatting_pattern.national_prefix_formatting_rule();
1492    if (number_format == PhoneNumberUtil::NATIONAL &&
1493        national_prefix_formatting_rule.length() > 0) {
1494      // Apply the national_prefix_formatting_rule as the formatting_pattern
1495      // contains only information on how the national significant number
1496      // should be formatted at this point.
1497      reg_exps_->first_group_capturing_pattern_->Replace(
1498          &number_format_rule, national_prefix_formatting_rule);
1499    }
1500  }
1501  formatted_number->assign(national_number);
1502
1503  const RegExp& pattern_to_match(
1504      reg_exps_->regexp_cache_->GetRegExp(formatting_pattern.pattern()));
1505  pattern_to_match.GlobalReplace(formatted_number, number_format_rule);
1506
1507  if (number_format == RFC3966) {
1508    // First consume any leading punctuation, if any was present.
1509    const scoped_ptr<RegExpInput> number(
1510        reg_exps_->regexp_factory_->CreateInput(*formatted_number));
1511    if (reg_exps_->separator_pattern_->Consume(number.get())) {
1512      formatted_number->assign(number->ToString());
1513    }
1514    // Then replace all separators with a "-".
1515    reg_exps_->separator_pattern_->GlobalReplace(formatted_number, "-");
1516  }
1517}
1518
1519// Simple wrapper of FormatNsnUsingPatternWithCarrier for the common case of
1520// no carrier code.
1521void PhoneNumberUtil::FormatNsnUsingPattern(
1522    const string& national_number,
1523    const NumberFormat& formatting_pattern,
1524    PhoneNumberUtil::PhoneNumberFormat number_format,
1525    string* formatted_number) const {
1526  DCHECK(formatted_number);
1527  FormatNsnUsingPatternWithCarrier(national_number, formatting_pattern,
1528                                   number_format, "", formatted_number);
1529}
1530
1531void PhoneNumberUtil::FormatNsn(const string& number,
1532                                const PhoneMetadata& metadata,
1533                                PhoneNumberFormat number_format,
1534                                string* formatted_number) const {
1535  DCHECK(formatted_number);
1536  FormatNsnWithCarrier(number, metadata, number_format, "", formatted_number);
1537}
1538
1539// Note in some regions, the national number can be written in two completely
1540// different ways depending on whether it forms part of the NATIONAL format or
1541// INTERNATIONAL format. The number_format parameter here is used to specify
1542// which format to use for those cases. If a carrier_code is specified, this
1543// will be inserted into the formatted string to replace $CC.
1544void PhoneNumberUtil::FormatNsnWithCarrier(const string& number,
1545                                           const PhoneMetadata& metadata,
1546                                           PhoneNumberFormat number_format,
1547                                           const string& carrier_code,
1548                                           string* formatted_number) const {
1549  DCHECK(formatted_number);
1550  // When the intl_number_formats exists, we use that to format national number
1551  // for the INTERNATIONAL format instead of using the number_formats.
1552  const RepeatedPtrField<NumberFormat> available_formats =
1553      (metadata.intl_number_format_size() == 0 || number_format == NATIONAL)
1554      ? metadata.number_format()
1555      : metadata.intl_number_format();
1556  const NumberFormat* formatting_pattern =
1557      ChooseFormattingPatternForNumber(available_formats, number);
1558  if (!formatting_pattern) {
1559    formatted_number->assign(number);
1560  } else {
1561    FormatNsnUsingPatternWithCarrier(number, *formatting_pattern, number_format,
1562                                     carrier_code, formatted_number);
1563  }
1564}
1565
1566// Appends the formatted extension of a phone number, if the phone number had an
1567// extension specified.
1568void PhoneNumberUtil::MaybeAppendFormattedExtension(
1569    const PhoneNumber& number,
1570    const PhoneMetadata& metadata,
1571    PhoneNumberFormat number_format,
1572    string* formatted_number) const {
1573  DCHECK(formatted_number);
1574  if (number.has_extension() && number.extension().length() > 0) {
1575    if (number_format == RFC3966) {
1576      StrAppend(formatted_number, kRfc3966ExtnPrefix, number.extension());
1577    } else {
1578      if (metadata.has_preferred_extn_prefix()) {
1579        StrAppend(formatted_number, metadata.preferred_extn_prefix(),
1580                  number.extension());
1581      } else {
1582        StrAppend(formatted_number, kDefaultExtnPrefix, number.extension());
1583      }
1584    }
1585  }
1586}
1587
1588bool PhoneNumberUtil::IsNANPACountry(const string& region_code) const {
1589  return nanpa_regions_->find(region_code) != nanpa_regions_->end();
1590}
1591
1592// Returns the region codes that matches the specific country calling code. In
1593// the case of no region code being found, region_codes will be left empty.
1594void PhoneNumberUtil::GetRegionCodesForCountryCallingCode(
1595    int country_calling_code,
1596    list<string>* region_codes) const {
1597  DCHECK(region_codes);
1598  // Create a IntRegionsPair with the country_code passed in, and use it to
1599  // locate the pair with the same country_code in the sorted vector.
1600  IntRegionsPair target_pair;
1601  target_pair.first = country_calling_code;
1602  typedef vector<IntRegionsPair>::const_iterator ConstIterator;
1603  pair<ConstIterator, ConstIterator> range = equal_range(
1604      country_calling_code_to_region_code_map_->begin(),
1605      country_calling_code_to_region_code_map_->end(),
1606      target_pair, OrderByFirst());
1607  if (range.first != range.second) {
1608    region_codes->insert(region_codes->begin(),
1609                         range.first->second->begin(),
1610                         range.first->second->end());
1611  }
1612}
1613
1614// Returns the region code that matches the specific country calling code. In
1615// the case of no region code being found, the unknown region code will be
1616// returned.
1617void PhoneNumberUtil::GetRegionCodeForCountryCode(
1618    int country_calling_code,
1619    string* region_code) const {
1620  DCHECK(region_code);
1621  list<string> region_codes;
1622
1623  GetRegionCodesForCountryCallingCode(country_calling_code, &region_codes);
1624  *region_code = (region_codes.size() > 0) ?
1625      region_codes.front() : RegionCode::GetUnknown();
1626}
1627
1628void PhoneNumberUtil::GetRegionCodeForNumber(const PhoneNumber& number,
1629                                             string* region_code) const {
1630  DCHECK(region_code);
1631  int country_calling_code = number.country_code();
1632  list<string> region_codes;
1633  GetRegionCodesForCountryCallingCode(country_calling_code, &region_codes);
1634  if (region_codes.size() == 0) {
1635    string number_string;
1636    GetNationalSignificantNumber(number, &number_string);
1637    LOG(WARNING) << "Missing/invalid country calling code ("
1638                 << country_calling_code
1639                 << ") for number " << number_string;
1640    *region_code = RegionCode::GetUnknown();
1641    return;
1642  }
1643  if (region_codes.size() == 1) {
1644    *region_code = region_codes.front();
1645  } else {
1646    GetRegionCodeForNumberFromRegionList(number, region_codes, region_code);
1647  }
1648}
1649
1650void PhoneNumberUtil::GetRegionCodeForNumberFromRegionList(
1651    const PhoneNumber& number, const list<string>& region_codes,
1652    string* region_code) const {
1653  DCHECK(region_code);
1654  string national_number;
1655  GetNationalSignificantNumber(number, &national_number);
1656  for (list<string>::const_iterator it = region_codes.begin();
1657       it != region_codes.end(); ++it) {
1658    // Metadata cannot be NULL because the region codes come from the country
1659    // calling code map.
1660    const PhoneMetadata* metadata = GetMetadataForRegion(*it);
1661    if (metadata->has_leading_digits()) {
1662      const scoped_ptr<RegExpInput> number(
1663          reg_exps_->regexp_factory_->CreateInput(national_number));
1664      if (reg_exps_->regexp_cache_->
1665              GetRegExp(metadata->leading_digits()).Consume(number.get())) {
1666        *region_code = *it;
1667        return;
1668      }
1669    } else if (GetNumberTypeHelper(national_number, *metadata,
1670                                   reg_exps_->regexp_cache_.get()) != UNKNOWN) {
1671      *region_code = *it;
1672      return;
1673    }
1674  }
1675  *region_code = RegionCode::GetUnknown();
1676}
1677
1678int PhoneNumberUtil::GetCountryCodeForRegion(const string& region_code) const {
1679  if (!IsValidRegionCode(region_code)) {
1680    LOG(WARNING) << "Invalid or unknown region code (" << region_code
1681                 << ") provided.";
1682    return 0;
1683  }
1684  return GetCountryCodeForValidRegion(region_code);
1685}
1686
1687int PhoneNumberUtil::GetCountryCodeForValidRegion(
1688    const string& region_code) const {
1689  const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
1690  return metadata->country_code();
1691}
1692
1693// Gets a valid fixed-line number for the specified region_code. Returns false
1694// if the region was unknown or 001 (representing non-geographical regions), or
1695// if no number exists.
1696bool PhoneNumberUtil::GetExampleNumber(const string& region_code,
1697                                       PhoneNumber* number) const {
1698  DCHECK(number);
1699  return GetExampleNumberForType(region_code, FIXED_LINE, number);
1700}
1701
1702// Gets a valid number for the specified region_code and type.  Returns false if
1703// the country was unknown or 001 (representing non-geographical regions), or if
1704// no number exists.
1705bool PhoneNumberUtil::GetExampleNumberForType(
1706    const string& region_code,
1707    PhoneNumberUtil::PhoneNumberType type,
1708    PhoneNumber* number) const {
1709  DCHECK(number);
1710  if (!IsValidRegionCode(region_code)) {
1711    LOG(WARNING) << "Invalid or unknown region code (" << region_code
1712                 << ") provided.";
1713    return false;
1714  }
1715  const PhoneMetadata* region_metadata = GetMetadataForRegion(region_code);
1716  const PhoneNumberDesc* desc = GetNumberDescByType(*region_metadata, type);
1717  if (desc && desc->has_example_number()) {
1718    ErrorType success = Parse(desc->example_number(), region_code, number);
1719    if (success == NO_PARSING_ERROR) {
1720      return true;
1721    } else {
1722      LOG(ERROR) << "Error parsing example number ("
1723                 << static_cast<int>(success) << ")";
1724    }
1725  }
1726  return false;
1727}
1728
1729bool PhoneNumberUtil::GetExampleNumberForNonGeoEntity(
1730    int country_calling_code, PhoneNumber* number) const {
1731  DCHECK(number);
1732  const PhoneMetadata* metadata =
1733      GetMetadataForNonGeographicalRegion(country_calling_code);
1734  if (metadata) {
1735    const PhoneNumberDesc& desc = metadata->general_desc();
1736    if (desc.has_example_number()) {
1737      ErrorType success = Parse(StrCat(kPlusSign,
1738                                       SimpleItoa(country_calling_code),
1739                                       desc.example_number()),
1740                                RegionCode::ZZ(), number);
1741      if (success == NO_PARSING_ERROR) {
1742        return true;
1743      } else {
1744        LOG(ERROR) << "Error parsing example number ("
1745                   << static_cast<int>(success) << ")";
1746      }
1747    }
1748  } else {
1749    LOG(WARNING) << "Invalid or unknown country calling code provided: "
1750                 << country_calling_code;
1751  }
1752  return false;
1753}
1754
1755PhoneNumberUtil::ErrorType PhoneNumberUtil::Parse(const string& number_to_parse,
1756                                                  const string& default_region,
1757                                                  PhoneNumber* number) const {
1758  DCHECK(number);
1759  return ParseHelper(number_to_parse, default_region, false, true, number);
1760}
1761
1762PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseAndKeepRawInput(
1763    const string& number_to_parse,
1764    const string& default_region,
1765    PhoneNumber* number) const {
1766  DCHECK(number);
1767  return ParseHelper(number_to_parse, default_region, true, true, number);
1768}
1769
1770// Checks to see that the region code used is valid, or if it is not valid, that
1771// the number to parse starts with a + symbol so that we can attempt to infer
1772// the country from the number. Returns false if it cannot use the region
1773// provided and the region cannot be inferred.
1774bool PhoneNumberUtil::CheckRegionForParsing(
1775    const string& number_to_parse,
1776    const string& default_region) const {
1777  if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
1778    const scoped_ptr<RegExpInput> number(
1779        reg_exps_->regexp_factory_->CreateInput(number_to_parse));
1780    if (!reg_exps_->plus_chars_pattern_->Consume(number.get())) {
1781      return false;
1782    }
1783  }
1784  return true;
1785}
1786
1787// Converts number_to_parse to a form that we can parse and write it to
1788// national_number if it is written in RFC3966; otherwise extract a possible
1789// number out of it and write to national_number.
1790void PhoneNumberUtil::BuildNationalNumberForParsing(
1791    const string& number_to_parse, string* national_number) const {
1792  size_t index_of_phone_context = number_to_parse.find(kRfc3966PhoneContext);
1793  if (index_of_phone_context != string::npos) {
1794    int phone_context_start =
1795        index_of_phone_context + strlen(kRfc3966PhoneContext);
1796    // If the phone context contains a phone number prefix, we need to capture
1797    // it, whereas domains will be ignored.
1798    if (number_to_parse.at(phone_context_start) == kPlusSign[0]) {
1799      // Additional parameters might follow the phone context. If so, we will
1800      // remove them here because the parameters after phone context are not
1801      // important for parsing the phone number.
1802      size_t phone_context_end = number_to_parse.find(';', phone_context_start);
1803      if (phone_context_end != string::npos) {
1804        StrAppend(
1805            national_number, number_to_parse.substr(
1806                phone_context_start, phone_context_end - phone_context_start));
1807      } else {
1808        StrAppend(national_number, number_to_parse.substr(phone_context_start));
1809      }
1810    }
1811
1812    // Now append everything between the "tel:" prefix and the phone-context.
1813    // This should include the national number, an optional extension or
1814    // isdn-subaddress component.
1815    int end_of_rfc_prefix =
1816        number_to_parse.find(kRfc3966Prefix) + strlen(kRfc3966Prefix);
1817    StrAppend(
1818        national_number,
1819        number_to_parse.substr(end_of_rfc_prefix,
1820                               index_of_phone_context - end_of_rfc_prefix));
1821  } else {
1822    // Extract a possible number from the string passed in (this strips leading
1823    // characters that could not be the start of a phone number.)
1824    ExtractPossibleNumber(number_to_parse, national_number);
1825  }
1826
1827  // Delete the isdn-subaddress and everything after it if it is present. Note
1828  // extension won't appear at the same time with isdn-subaddress according to
1829  // paragraph 5.3 of the RFC3966 spec.
1830  size_t index_of_isdn = national_number->find(kRfc3966IsdnSubaddress);
1831  if (index_of_isdn != string::npos) {
1832    national_number->erase(index_of_isdn);
1833  }
1834  // If both phone context and isdn-subaddress are absent but other parameters
1835  // are present, the parameters are left in nationalNumber. This is because
1836  // we are concerned about deleting content from a potential number string
1837  // when there is no strong evidence that the number is actually written in
1838  // RFC3966.
1839}
1840
1841PhoneNumberUtil::ErrorType PhoneNumberUtil::ParseHelper(
1842    const string& number_to_parse,
1843    const string& default_region,
1844    bool keep_raw_input,
1845    bool check_region,
1846    PhoneNumber* phone_number) const {
1847  DCHECK(phone_number);
1848
1849  string national_number;
1850  BuildNationalNumberForParsing(number_to_parse, &national_number);
1851
1852  if (!IsViablePhoneNumber(national_number)) {
1853    VLOG(2) << "The string supplied did not seem to be a phone number.";
1854    return NOT_A_NUMBER;
1855  }
1856
1857  if (check_region &&
1858      !CheckRegionForParsing(national_number, default_region)) {
1859    VLOG(1) << "Missing or invalid default country.";
1860    return INVALID_COUNTRY_CODE_ERROR;
1861  }
1862  PhoneNumber temp_number;
1863  if (keep_raw_input) {
1864    temp_number.set_raw_input(number_to_parse);
1865  }
1866  // Attempt to parse extension first, since it doesn't require country-specific
1867  // data and we want to have the non-normalised number here.
1868  string extension;
1869  MaybeStripExtension(&national_number, &extension);
1870  if (!extension.empty()) {
1871    temp_number.set_extension(extension);
1872  }
1873  const PhoneMetadata* country_metadata = GetMetadataForRegion(default_region);
1874  // Check to see if the number is given in international format so we know
1875  // whether this number is from the default country or not.
1876  string normalized_national_number(national_number);
1877  ErrorType country_code_error =
1878      MaybeExtractCountryCode(country_metadata, keep_raw_input,
1879                              &normalized_national_number, &temp_number);
1880  if (country_code_error != NO_PARSING_ERROR) {
1881     const scoped_ptr<RegExpInput> number_string_piece(
1882        reg_exps_->regexp_factory_->CreateInput(national_number));
1883    if ((country_code_error == INVALID_COUNTRY_CODE_ERROR) &&
1884        (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get()))) {
1885      normalized_national_number.assign(number_string_piece->ToString());
1886      // Strip the plus-char, and try again.
1887      MaybeExtractCountryCode(country_metadata,
1888                              keep_raw_input,
1889                              &normalized_national_number,
1890                              &temp_number);
1891      if (temp_number.country_code() == 0) {
1892        return INVALID_COUNTRY_CODE_ERROR;
1893      }
1894    } else {
1895      return country_code_error;
1896    }
1897  }
1898  int country_code = temp_number.country_code();
1899  if (country_code != 0) {
1900    string phone_number_region;
1901    GetRegionCodeForCountryCode(country_code, &phone_number_region);
1902    if (phone_number_region != default_region) {
1903      country_metadata =
1904          GetMetadataForRegionOrCallingCode(country_code, phone_number_region);
1905    }
1906  } else if (country_metadata) {
1907    // If no extracted country calling code, use the region supplied instead.
1908    // Note that the national number was already normalized by
1909    // MaybeExtractCountryCode.
1910    country_code = country_metadata->country_code();
1911  }
1912  if (normalized_national_number.length() < kMinLengthForNsn) {
1913    VLOG(2) << "The string supplied is too short to be a phone number.";
1914    return TOO_SHORT_NSN;
1915  }
1916  if (country_metadata) {
1917    string* carrier_code = keep_raw_input ?
1918        temp_number.mutable_preferred_domestic_carrier_code() : NULL;
1919    MaybeStripNationalPrefixAndCarrierCode(*country_metadata,
1920                                           &normalized_national_number,
1921                                           carrier_code);
1922  }
1923  size_t normalized_national_number_length =
1924      normalized_national_number.length();
1925  if (normalized_national_number_length < kMinLengthForNsn) {
1926    VLOG(2) << "The string supplied is too short to be a phone number.";
1927    return TOO_SHORT_NSN;
1928  }
1929  if (normalized_national_number_length > kMaxLengthForNsn) {
1930    VLOG(2) << "The string supplied is too long to be a phone number.";
1931    return TOO_LONG_NSN;
1932  }
1933  temp_number.set_country_code(country_code);
1934  if (normalized_national_number[0] == '0') {
1935    temp_number.set_italian_leading_zero(true);
1936  }
1937  uint64 number_as_int;
1938  safe_strtou64(normalized_national_number, &number_as_int);
1939  temp_number.set_national_number(number_as_int);
1940  phone_number->MergeFrom(temp_number);
1941  return NO_PARSING_ERROR;
1942}
1943
1944// Attempts to extract a possible number from the string passed in. This
1945// currently strips all leading characters that could not be used to start a
1946// phone number. Characters that can be used to start a phone number are
1947// defined in the valid_start_char_pattern. If none of these characters are
1948// found in the number passed in, an empty string is returned. This function
1949// also attempts to strip off any alternative extensions or endings if two or
1950// more are present, such as in the case of: (530) 583-6985 x302/x2303. The
1951// second extension here makes this actually two phone numbers, (530) 583-6985
1952// x302 and (530) 583-6985 x2303. We remove the second extension so that the
1953// first number is parsed correctly.
1954void PhoneNumberUtil::ExtractPossibleNumber(const string& number,
1955                                            string* extracted_number) const {
1956  DCHECK(extracted_number);
1957
1958  UnicodeText number_as_unicode;
1959  number_as_unicode.PointToUTF8(number.data(), number.size());
1960  char current_char[5];
1961  int len;
1962  UnicodeText::const_iterator it;
1963  for (it = number_as_unicode.begin(); it != number_as_unicode.end(); ++it) {
1964    len = it.get_utf8(current_char);
1965    current_char[len] = '\0';
1966    if (reg_exps_->valid_start_char_pattern_->FullMatch(current_char)) {
1967      break;
1968    }
1969  }
1970
1971  if (it == number_as_unicode.end()) {
1972    // No valid start character was found. extracted_number should be set to
1973    // empty string.
1974    extracted_number->assign("");
1975    return;
1976  }
1977
1978  extracted_number->assign(
1979      UnicodeText::UTF8Substring(it, number_as_unicode.end()));
1980  TrimUnwantedEndChars(extracted_number);
1981  if (extracted_number->length() == 0) {
1982    return;
1983  }
1984
1985  VLOG(3) << "After stripping starting and trailing characters, left with: "
1986          << *extracted_number;
1987
1988  // Now remove any extra numbers at the end.
1989  reg_exps_->capture_up_to_second_number_start_pattern_->
1990      PartialMatch(*extracted_number, extracted_number);
1991}
1992
1993bool PhoneNumberUtil::IsPossibleNumber(const PhoneNumber& number) const {
1994  return IsPossibleNumberWithReason(number) == IS_POSSIBLE;
1995}
1996
1997bool PhoneNumberUtil::IsPossibleNumberForString(
1998    const string& number,
1999    const string& region_dialing_from) const {
2000  PhoneNumber number_proto;
2001  if (Parse(number, region_dialing_from, &number_proto) == NO_PARSING_ERROR) {
2002    return IsPossibleNumber(number_proto);
2003  } else {
2004    return false;
2005  }
2006}
2007
2008PhoneNumberUtil::ValidationResult PhoneNumberUtil::IsPossibleNumberWithReason(
2009    const PhoneNumber& number) const {
2010  string national_number;
2011  GetNationalSignificantNumber(number, &national_number);
2012  int country_code = number.country_code();
2013  // Note: For Russian Fed and NANPA numbers, we just use the rules from the
2014  // default region (US or Russia) since the GetRegionCodeForNumber will not
2015  // work if the number is possible but not valid. This would need to be
2016  // revisited if the possible number pattern ever differed between various
2017  // regions within those plans.
2018  if (!HasValidCountryCallingCode(country_code)) {
2019    return INVALID_COUNTRY_CODE;
2020  }
2021  string region_code;
2022  GetRegionCodeForCountryCode(country_code, &region_code);
2023  // Metadata cannot be NULL because the country calling code is valid.
2024  const PhoneMetadata* metadata =
2025      GetMetadataForRegionOrCallingCode(country_code, region_code);
2026  const PhoneNumberDesc& general_num_desc = metadata->general_desc();
2027  // Handling case of numbers with no metadata.
2028  if (!general_num_desc.has_national_number_pattern()) {
2029    size_t number_length = national_number.length();
2030    if (number_length < kMinLengthForNsn) {
2031      return TOO_SHORT;
2032    } else if (number_length > kMaxLengthForNsn) {
2033      return TOO_LONG;
2034    } else {
2035      return IS_POSSIBLE;
2036    }
2037  }
2038  const RegExp& possible_number_pattern = reg_exps_->regexp_cache_->GetRegExp(
2039      StrCat("(", general_num_desc.possible_number_pattern(), ")"));
2040  return TestNumberLengthAgainstPattern(possible_number_pattern,
2041                                        national_number);
2042}
2043
2044bool PhoneNumberUtil::TruncateTooLongNumber(PhoneNumber* number) const {
2045  if (IsValidNumber(*number)) {
2046    return true;
2047  }
2048  PhoneNumber number_copy(*number);
2049  uint64 national_number = number->national_number();
2050  do {
2051    national_number /= 10;
2052    number_copy.set_national_number(national_number);
2053    if (IsPossibleNumberWithReason(number_copy) == TOO_SHORT ||
2054        national_number == 0) {
2055      return false;
2056    }
2057  } while (!IsValidNumber(number_copy));
2058  number->set_national_number(national_number);
2059  return true;
2060}
2061
2062PhoneNumberUtil::PhoneNumberType PhoneNumberUtil::GetNumberType(
2063    const PhoneNumber& number) const {
2064  string region_code;
2065  GetRegionCodeForNumber(number, &region_code);
2066  const PhoneMetadata* metadata =
2067      GetMetadataForRegionOrCallingCode(number.country_code(), region_code);
2068  if (!metadata) {
2069    return UNKNOWN;
2070  }
2071  string national_significant_number;
2072  GetNationalSignificantNumber(number, &national_significant_number);
2073  return GetNumberTypeHelper(national_significant_number,
2074                             *metadata,
2075                             reg_exps_->regexp_cache_.get());
2076}
2077
2078bool PhoneNumberUtil::IsValidNumber(const PhoneNumber& number) const {
2079  string region_code;
2080  GetRegionCodeForNumber(number, &region_code);
2081  return IsValidNumberForRegion(number, region_code);
2082}
2083
2084bool PhoneNumberUtil::IsValidNumberForRegion(const PhoneNumber& number,
2085                                             const string& region_code) const {
2086  int country_code = number.country_code();
2087  const PhoneMetadata* metadata =
2088      GetMetadataForRegionOrCallingCode(country_code, region_code);
2089  if (!metadata ||
2090      ((kRegionCodeForNonGeoEntity != region_code) &&
2091       country_code != GetCountryCodeForValidRegion(region_code))) {
2092    // Either the region code was invalid, or the country calling code for this
2093    // number does not match that of the region code.
2094    return false;
2095  }
2096  const PhoneNumberDesc& general_desc = metadata->general_desc();
2097  string national_number;
2098  GetNationalSignificantNumber(number, &national_number);
2099
2100  // For regions where we don't have metadata for PhoneNumberDesc, we treat
2101  // any number passed in as a valid number if its national significant number
2102  // is between the minimum and maximum lengths defined by ITU for a national
2103  // significant number.
2104  if (!general_desc.has_national_number_pattern()) {
2105    VLOG(3) << "Validating number with incomplete metadata.";
2106    size_t number_length = national_number.length();
2107    return number_length > kMinLengthForNsn &&
2108        number_length <= kMaxLengthForNsn;
2109  }
2110  return GetNumberTypeHelper(national_number, *metadata,
2111                             reg_exps_->regexp_cache_.get()) != UNKNOWN;
2112}
2113
2114bool PhoneNumberUtil::IsNumberGeographical(
2115    const PhoneNumber& phone_number) const {
2116  PhoneNumberType number_type = GetNumberType(phone_number);
2117  // TODO: Include mobile phone numbers from countries like
2118  // Indonesia, which has some mobile numbers that are geographical.
2119  return number_type == PhoneNumberUtil::FIXED_LINE ||
2120      number_type == PhoneNumberUtil::FIXED_LINE_OR_MOBILE;
2121}
2122
2123bool PhoneNumberUtil::IsLeadingZeroPossible(int country_calling_code) const {
2124  string region_code;
2125  GetRegionCodeForCountryCode(country_calling_code, &region_code);
2126  const PhoneMetadata* main_metadata_for_calling_code =
2127      GetMetadataForRegionOrCallingCode(country_calling_code, region_code);
2128  if (!main_metadata_for_calling_code) return false;
2129  return main_metadata_for_calling_code->leading_zero_possible();
2130}
2131
2132void PhoneNumberUtil::GetNationalSignificantNumber(
2133    const PhoneNumber& number,
2134    string* national_number) const {
2135  DCHECK(national_number);
2136  // If a leading zero has been set, we prefix this now. Note this is not a
2137  // national prefix.
2138  StrAppend(national_number, number.italian_leading_zero() ? "0" : "");
2139  StrAppend(national_number, number.national_number());
2140}
2141
2142int PhoneNumberUtil::GetLengthOfGeographicalAreaCode(
2143    const PhoneNumber& number) const {
2144  string region_code;
2145  GetRegionCodeForNumber(number, &region_code);
2146  const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
2147  if (!metadata) {
2148    return 0;
2149  }
2150  // If a country doesn't use a national prefix, and this number doesn't have an
2151  // Italian leading zero, we assume it is a closed dialling plan with no area
2152  // codes.
2153  if (!metadata->has_national_prefix() && !number.italian_leading_zero()) {
2154    return 0;
2155  }
2156
2157  if (!IsNumberGeographical(number)) {
2158    return 0;
2159  }
2160
2161  return GetLengthOfNationalDestinationCode(number);
2162}
2163
2164int PhoneNumberUtil::GetLengthOfNationalDestinationCode(
2165    const PhoneNumber& number) const {
2166  PhoneNumber copied_proto(number);
2167  if (number.has_extension()) {
2168    // Clear the extension so it's not included when formatting.
2169    copied_proto.clear_extension();
2170  }
2171
2172  string formatted_number;
2173  Format(copied_proto, INTERNATIONAL, &formatted_number);
2174  const scoped_ptr<RegExpInput> i18n_number(
2175      reg_exps_->regexp_factory_->CreateInput(formatted_number));
2176  string digit_group;
2177  string ndc;
2178  string third_group;
2179  for (int i = 0; i < 3; ++i) {
2180    if (!reg_exps_->capturing_ascii_digits_pattern_->FindAndConsume(
2181            i18n_number.get(), &digit_group)) {
2182      // We should find at least three groups.
2183      return 0;
2184    }
2185    if (i == 1) {
2186      ndc = digit_group;
2187    } else if (i == 2) {
2188      third_group = digit_group;
2189    }
2190  }
2191
2192  if (GetNumberType(number) == MOBILE) {
2193    // For example Argentinian mobile numbers, when formatted in the
2194    // international format, are in the form of +54 9 NDC XXXX.... As a result,
2195    // we take the length of the third group (NDC) and add the length of the
2196    // mobile token, which also forms part of the national significant number.
2197    // This assumes that the mobile token is always formatted separately from
2198    // the rest of the phone number.
2199    string mobile_token;
2200    GetCountryMobileToken(number.country_code(), &mobile_token);
2201    if (!mobile_token.empty()) {
2202      return third_group.size() + mobile_token.size();
2203    }
2204  }
2205  return ndc.size();
2206}
2207
2208void PhoneNumberUtil::GetCountryMobileToken(int country_calling_code,
2209                                            string* mobile_token) const {
2210  DCHECK(mobile_token);
2211  map<int, char>::iterator it = reg_exps_->mobile_token_mappings_.find(
2212      country_calling_code);
2213  if (it != reg_exps_->mobile_token_mappings_.end()) {
2214    *mobile_token = it->second;
2215  } else {
2216    mobile_token->assign("");
2217  }
2218}
2219
2220void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
2221  DCHECK(number);
2222  const RegExp& non_digits_pattern = reg_exps_->regexp_cache_->GetRegExp(
2223      StrCat("[^", kDigits, "]"));
2224  // Delete everything that isn't valid digits.
2225  non_digits_pattern.GlobalReplace(number, "");
2226  // Normalize all decimal digits to ASCII digits.
2227  number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
2228}
2229
2230void PhoneNumberUtil::NormalizeDiallableCharsOnly(string* number) const {
2231  DCHECK(number);
2232  NormalizeHelper(reg_exps_->diallable_char_mappings_,
2233                  true /* remove non matches */, number);
2234}
2235
2236bool PhoneNumberUtil::IsAlphaNumber(const string& number) const {
2237  if (!IsViablePhoneNumber(number)) {
2238    // Number is too short, or doesn't match the basic phone number pattern.
2239    return false;
2240  }
2241  // Copy the number, since we are going to try and strip the extension from it.
2242  string number_copy(number);
2243  string extension;
2244  MaybeStripExtension(&number_copy, &extension);
2245  return reg_exps_->valid_alpha_phone_pattern_->FullMatch(number_copy);
2246}
2247
2248void PhoneNumberUtil::ConvertAlphaCharactersInNumber(string* number) const {
2249  DCHECK(number);
2250  NormalizeHelper(reg_exps_->alpha_phone_mappings_, false, number);
2251}
2252
2253// Normalizes a string of characters representing a phone number. This performs
2254// the following conversions:
2255//   - Punctuation is stripped.
2256//   For ALPHA/VANITY numbers:
2257//   - Letters are converted to their numeric representation on a telephone
2258//     keypad. The keypad used here is the one defined in ITU Recommendation
2259//     E.161. This is only done if there are 3 or more letters in the number, to
2260//     lessen the risk that such letters are typos.
2261//   For other numbers:
2262//   - Wide-ascii digits are converted to normal ASCII (European) digits.
2263//   - Arabic-Indic numerals are converted to European numerals.
2264//   - Spurious alpha characters are stripped.
2265void PhoneNumberUtil::Normalize(string* number) const {
2266  DCHECK(number);
2267  if (reg_exps_->valid_alpha_phone_pattern_->PartialMatch(*number)) {
2268    NormalizeHelper(reg_exps_->alpha_phone_mappings_, true, number);
2269  }
2270  NormalizeDigitsOnly(number);
2271}
2272
2273// Checks to see if the string of characters could possibly be a phone number at
2274// all. At the moment, checks to see that the string begins with at least 3
2275// digits, ignoring any punctuation commonly found in phone numbers.  This
2276// method does not require the number to be normalized in advance - but does
2277// assume that leading non-number symbols have been removed, such as by the
2278// method ExtractPossibleNumber.
2279bool PhoneNumberUtil::IsViablePhoneNumber(const string& number) const {
2280  if (number.length() < kMinLengthForNsn) {
2281    VLOG(2) << "Number too short to be viable:" << number;
2282    return false;
2283  }
2284  return reg_exps_->valid_phone_number_pattern_->FullMatch(number);
2285}
2286
2287// Strips the IDD from the start of the number if present. Helper function used
2288// by MaybeStripInternationalPrefixAndNormalize.
2289bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern,
2290                                       string* number) const {
2291  DCHECK(number);
2292  const scoped_ptr<RegExpInput> number_copy(
2293      reg_exps_->regexp_factory_->CreateInput(*number));
2294  // First attempt to strip the idd_pattern at the start, if present. We make a
2295  // copy so that we can revert to the original string if necessary.
2296  if (idd_pattern.Consume(number_copy.get())) {
2297    // Only strip this if the first digit after the match is not a 0, since
2298    // country calling codes cannot begin with 0.
2299    string extracted_digit;
2300    if (reg_exps_->capturing_digit_pattern_->PartialMatch(
2301            number_copy->ToString(), &extracted_digit)) {
2302      NormalizeDigitsOnly(&extracted_digit);
2303      if (extracted_digit == "0") {
2304        return false;
2305      }
2306    }
2307    number->assign(number_copy->ToString());
2308    return true;
2309  }
2310  return false;
2311}
2312
2313// Strips any international prefix (such as +, 00, 011) present in the number
2314// provided, normalizes the resulting number, and indicates if an international
2315// prefix was present.
2316//
2317// possible_idd_prefix represents the international direct dialing prefix from
2318// the region we think this number may be dialed in.
2319// Returns true if an international dialing prefix could be removed from the
2320// number, otherwise false if the number did not seem to be in international
2321// format.
2322PhoneNumber::CountryCodeSource
2323PhoneNumberUtil::MaybeStripInternationalPrefixAndNormalize(
2324    const string& possible_idd_prefix,
2325    string* number) const {
2326  DCHECK(number);
2327  if (number->empty()) {
2328    return PhoneNumber::FROM_DEFAULT_COUNTRY;
2329  }
2330  const scoped_ptr<RegExpInput> number_string_piece(
2331      reg_exps_->regexp_factory_->CreateInput(*number));
2332  if (reg_exps_->plus_chars_pattern_->Consume(number_string_piece.get())) {
2333    number->assign(number_string_piece->ToString());
2334    // Can now normalize the rest of the number since we've consumed the "+"
2335    // sign at the start.
2336    Normalize(number);
2337    return PhoneNumber::FROM_NUMBER_WITH_PLUS_SIGN;
2338  }
2339  // Attempt to parse the first digits as an international prefix.
2340  const RegExp& idd_pattern =
2341      reg_exps_->regexp_cache_->GetRegExp(possible_idd_prefix);
2342  Normalize(number);
2343  return ParsePrefixAsIdd(idd_pattern, number)
2344      ? PhoneNumber::FROM_NUMBER_WITH_IDD
2345      : PhoneNumber::FROM_DEFAULT_COUNTRY;
2346}
2347
2348// Strips any national prefix (such as 0, 1) present in the number provided.
2349// The number passed in should be the normalized telephone number that we wish
2350// to strip any national dialing prefix from. The metadata should be for the
2351// region that we think this number is from. Returns true if a national prefix
2352// and/or carrier code was stripped.
2353bool PhoneNumberUtil::MaybeStripNationalPrefixAndCarrierCode(
2354    const PhoneMetadata& metadata,
2355    string* number,
2356    string* carrier_code) const {
2357  DCHECK(number);
2358  string carrier_code_temp;
2359  const string& possible_national_prefix =
2360      metadata.national_prefix_for_parsing();
2361  if (number->empty() || possible_national_prefix.empty()) {
2362    // Early return for numbers of zero length or with no national prefix
2363    // possible.
2364    return false;
2365  }
2366  // We use two copies here since Consume modifies the phone number, and if the
2367  // first if-clause fails the number will already be changed.
2368  const scoped_ptr<RegExpInput> number_copy(
2369      reg_exps_->regexp_factory_->CreateInput(*number));
2370  const scoped_ptr<RegExpInput> number_copy_without_transform(
2371      reg_exps_->regexp_factory_->CreateInput(*number));
2372  string number_string_copy(*number);
2373  string captured_part_of_prefix;
2374  const RegExp& national_number_rule = reg_exps_->regexp_cache_->GetRegExp(
2375      metadata.general_desc().national_number_pattern());
2376  // Check if the original number is viable.
2377  bool is_viable_original_number = national_number_rule.FullMatch(*number);
2378  // Attempt to parse the first digits as a national prefix. We make a
2379  // copy so that we can revert to the original string if necessary.
2380  const string& transform_rule = metadata.national_prefix_transform_rule();
2381  const RegExp& possible_national_prefix_pattern =
2382      reg_exps_->regexp_cache_->GetRegExp(possible_national_prefix);
2383  if (!transform_rule.empty() &&
2384      (possible_national_prefix_pattern.Consume(
2385          number_copy.get(), &carrier_code_temp, &captured_part_of_prefix) ||
2386       possible_national_prefix_pattern.Consume(
2387           number_copy.get(), &captured_part_of_prefix)) &&
2388      !captured_part_of_prefix.empty()) {
2389    // If this succeeded, then we must have had a transform rule and there must
2390    // have been some part of the prefix that we captured.
2391    // We make the transformation and check that the resultant number is still
2392    // viable. If so, replace the number and return.
2393    possible_national_prefix_pattern.Replace(&number_string_copy,
2394                                             transform_rule);
2395    if (is_viable_original_number &&
2396        !national_number_rule.FullMatch(number_string_copy)) {
2397      return false;
2398    }
2399    number->assign(number_string_copy);
2400    if (carrier_code) {
2401      carrier_code->assign(carrier_code_temp);
2402    }
2403  } else if (possible_national_prefix_pattern.Consume(
2404                 number_copy_without_transform.get(), &carrier_code_temp) ||
2405             possible_national_prefix_pattern.Consume(
2406                 number_copy_without_transform.get())) {
2407    VLOG(4) << "Parsed the first digits as a national prefix.";
2408    // If captured_part_of_prefix is empty, this implies nothing was captured by
2409    // the capturing groups in possible_national_prefix; therefore, no
2410    // transformation is necessary, and we just remove the national prefix.
2411    const string number_copy_as_string =
2412        number_copy_without_transform->ToString();
2413    if (is_viable_original_number &&
2414        !national_number_rule.FullMatch(number_copy_as_string)) {
2415      return false;
2416    }
2417    number->assign(number_copy_as_string);
2418    if (carrier_code) {
2419      carrier_code->assign(carrier_code_temp);
2420    }
2421  } else {
2422    return false;
2423    VLOG(4) << "The first digits did not match the national prefix.";
2424  }
2425  return true;
2426}
2427
2428// Strips any extension (as in, the part of the number dialled after the call is
2429// connected, usually indicated with extn, ext, x or similar) from the end of
2430// the number, and returns it. The number passed in should be non-normalized.
2431bool PhoneNumberUtil::MaybeStripExtension(string* number, string* extension)
2432    const {
2433  DCHECK(number);
2434  DCHECK(extension);
2435  // There are three extension capturing groups in the regular expression.
2436  string possible_extension_one;
2437  string possible_extension_two;
2438  string possible_extension_three;
2439  string number_copy(*number);
2440  const scoped_ptr<RegExpInput> number_copy_as_regexp_input(
2441      reg_exps_->regexp_factory_->CreateInput(number_copy));
2442  if (reg_exps_->extn_pattern_->Consume(number_copy_as_regexp_input.get(),
2443                            false,
2444                            &possible_extension_one,
2445                            &possible_extension_two,
2446                            &possible_extension_three)) {
2447    // Replace the extensions in the original string here.
2448    reg_exps_->extn_pattern_->Replace(&number_copy, "");
2449    VLOG(4) << "Found an extension. Possible extension one: "
2450            << possible_extension_one
2451            << ". Possible extension two: " << possible_extension_two
2452            << ". Possible extension three: " << possible_extension_three
2453            << ". Remaining number: " << number_copy;
2454    // If we find a potential extension, and the number preceding this is a
2455    // viable number, we assume it is an extension.
2456    if ((!possible_extension_one.empty() || !possible_extension_two.empty() ||
2457         !possible_extension_three.empty()) &&
2458        IsViablePhoneNumber(number_copy)) {
2459      number->assign(number_copy);
2460      if (!possible_extension_one.empty()) {
2461        extension->assign(possible_extension_one);
2462      } else if (!possible_extension_two.empty()) {
2463        extension->assign(possible_extension_two);
2464      } else if (!possible_extension_three.empty()) {
2465        extension->assign(possible_extension_three);
2466      }
2467      return true;
2468    }
2469  }
2470  return false;
2471}
2472
2473// Extracts country calling code from national_number, and returns it. It
2474// assumes that the leading plus sign or IDD has already been removed. Returns 0
2475// if national_number doesn't start with a valid country calling code, and
2476// leaves national_number unmodified. Assumes the national_number is at least 3
2477// characters long.
2478int PhoneNumberUtil::ExtractCountryCode(string* national_number) const {
2479  int potential_country_code;
2480  if (national_number->empty() || (national_number->at(0) == '0')) {
2481    // Country codes do not begin with a '0'.
2482    return 0;
2483  }
2484  for (size_t i = 1; i <= kMaxLengthCountryCode; ++i) {
2485    safe_strto32(national_number->substr(0, i), &potential_country_code);
2486    string region_code;
2487    GetRegionCodeForCountryCode(potential_country_code, &region_code);
2488    if (region_code != RegionCode::GetUnknown()) {
2489      national_number->erase(0, i);
2490      return potential_country_code;
2491    }
2492  }
2493  return 0;
2494}
2495
2496// Tries to extract a country calling code from a number. Country calling codes
2497// are extracted in the following ways:
2498//   - by stripping the international dialing prefix of the region the person
2499//   is dialing from, if this is present in the number, and looking at the next
2500//   digits
2501//   - by stripping the '+' sign if present and then looking at the next digits
2502//   - by comparing the start of the number and the country calling code of the
2503//   default region. If the number is not considered possible for the numbering
2504//   plan of the default region initially, but starts with the country calling
2505//   code of this region, validation will be reattempted after stripping this
2506//   country calling code. If this number is considered a possible number, then
2507//   the first digits will be considered the country calling code and removed as
2508//   such.
2509//
2510//   Returns NO_PARSING_ERROR if a country calling code was successfully
2511//   extracted or none was present, or the appropriate error otherwise, such as
2512//   if a + was present but it was not followed by a valid country calling code.
2513//   If NO_PARSING_ERROR is returned, the national_number without the country
2514//   calling code is populated, and the country_code of the phone_number passed
2515//   in is set to the country calling code if found, otherwise to 0.
2516PhoneNumberUtil::ErrorType PhoneNumberUtil::MaybeExtractCountryCode(
2517    const PhoneMetadata* default_region_metadata,
2518    bool keep_raw_input,
2519    string* national_number,
2520    PhoneNumber* phone_number) const {
2521  DCHECK(national_number);
2522  DCHECK(phone_number);
2523  // Set the default prefix to be something that will never match if there is no
2524  // default region.
2525  string possible_country_idd_prefix = default_region_metadata
2526      ?  default_region_metadata->international_prefix()
2527      : "NonMatch";
2528  PhoneNumber::CountryCodeSource country_code_source =
2529      MaybeStripInternationalPrefixAndNormalize(possible_country_idd_prefix,
2530                                                national_number);
2531  if (keep_raw_input) {
2532    phone_number->set_country_code_source(country_code_source);
2533  }
2534  if (country_code_source != PhoneNumber::FROM_DEFAULT_COUNTRY) {
2535    if (national_number->length() <= kMinLengthForNsn) {
2536      VLOG(2) << "Phone number had an IDD, but after this was not "
2537              << "long enough to be a viable phone number.";
2538      return TOO_SHORT_AFTER_IDD;
2539    }
2540    int potential_country_code = ExtractCountryCode(national_number);
2541    if (potential_country_code != 0) {
2542      phone_number->set_country_code(potential_country_code);
2543      return NO_PARSING_ERROR;
2544    }
2545    // If this fails, they must be using a strange country calling code that we
2546    // don't recognize, or that doesn't exist.
2547    return INVALID_COUNTRY_CODE_ERROR;
2548  } else if (default_region_metadata) {
2549    // Check to see if the number starts with the country calling code for the
2550    // default region. If so, we remove the country calling code, and do some
2551    // checks on the validity of the number before and after.
2552    int default_country_code = default_region_metadata->country_code();
2553    string default_country_code_string(SimpleItoa(default_country_code));
2554    VLOG(4) << "Possible country calling code: " << default_country_code_string;
2555    string potential_national_number;
2556    if (TryStripPrefixString(*national_number,
2557                             default_country_code_string,
2558                             &potential_national_number)) {
2559      const PhoneNumberDesc& general_num_desc =
2560          default_region_metadata->general_desc();
2561      const RegExp& valid_number_pattern =
2562          reg_exps_->regexp_cache_->GetRegExp(
2563              general_num_desc.national_number_pattern());
2564      MaybeStripNationalPrefixAndCarrierCode(*default_region_metadata,
2565                                             &potential_national_number,
2566                                             NULL);
2567      VLOG(4) << "Number without country calling code prefix: "
2568              << potential_national_number;
2569      const RegExp& possible_number_pattern =
2570          reg_exps_->regexp_cache_->GetRegExp(
2571              StrCat("(", general_num_desc.possible_number_pattern(), ")"));
2572      // If the number was not valid before but is valid now, or if it was too
2573      // long before, we consider the number with the country code stripped to
2574      // be a better result and keep that instead.
2575      if ((!valid_number_pattern.FullMatch(*national_number) &&
2576           valid_number_pattern.FullMatch(potential_national_number)) ||
2577           TestNumberLengthAgainstPattern(possible_number_pattern,
2578                                          *national_number) == TOO_LONG) {
2579        national_number->assign(potential_national_number);
2580        if (keep_raw_input) {
2581          phone_number->set_country_code_source(
2582              PhoneNumber::FROM_NUMBER_WITHOUT_PLUS_SIGN);
2583        }
2584        phone_number->set_country_code(default_country_code);
2585        return NO_PARSING_ERROR;
2586      }
2587    }
2588  }
2589  // No country calling code present. Set the country_code to 0.
2590  phone_number->set_country_code(0);
2591  return NO_PARSING_ERROR;
2592}
2593
2594PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatch(
2595    const PhoneNumber& first_number_in,
2596    const PhoneNumber& second_number_in) const {
2597  // Make copies of the phone number so that the numbers passed in are not
2598  // edited.
2599  PhoneNumber first_number(first_number_in);
2600  PhoneNumber second_number(second_number_in);
2601  // First clear raw_input and country_code_source and
2602  // preferred_domestic_carrier_code fields and any empty-string extensions so
2603  // that we can use the proto-buffer equality method.
2604  first_number.clear_raw_input();
2605  first_number.clear_country_code_source();
2606  first_number.clear_preferred_domestic_carrier_code();
2607  second_number.clear_raw_input();
2608  second_number.clear_country_code_source();
2609  second_number.clear_preferred_domestic_carrier_code();
2610  if (first_number.extension().empty()) {
2611    first_number.clear_extension();
2612  }
2613  if (second_number.extension().empty()) {
2614    second_number.clear_extension();
2615  }
2616  // Early exit if both had extensions and these are different.
2617  if (first_number.has_extension() && second_number.has_extension() &&
2618      first_number.extension() != second_number.extension()) {
2619    return NO_MATCH;
2620  }
2621  int first_number_country_code = first_number.country_code();
2622  int second_number_country_code = second_number.country_code();
2623  // Both had country calling code specified.
2624  if (first_number_country_code != 0 && second_number_country_code != 0) {
2625    if (ExactlySameAs(first_number, second_number)) {
2626      return EXACT_MATCH;
2627    } else if (first_number_country_code == second_number_country_code &&
2628               IsNationalNumberSuffixOfTheOther(first_number, second_number)) {
2629      // A SHORT_NSN_MATCH occurs if there is a difference because of the
2630      // presence or absence of an 'Italian leading zero', the presence or
2631      // absence of an extension, or one NSN being a shorter variant of the
2632      // other.
2633      return SHORT_NSN_MATCH;
2634    }
2635    // This is not a match.
2636    return NO_MATCH;
2637  }
2638  // Checks cases where one or both country calling codes were not specified. To
2639  // make equality checks easier, we first set the country_code fields to be
2640  // equal.
2641  first_number.set_country_code(second_number_country_code);
2642  // If all else was the same, then this is an NSN_MATCH.
2643  if (ExactlySameAs(first_number, second_number)) {
2644    return NSN_MATCH;
2645  }
2646  if (IsNationalNumberSuffixOfTheOther(first_number, second_number)) {
2647    return SHORT_NSN_MATCH;
2648  }
2649  return NO_MATCH;
2650}
2651
2652PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithTwoStrings(
2653    const string& first_number,
2654    const string& second_number) const {
2655  PhoneNumber first_number_as_proto;
2656  ErrorType error_type =
2657      Parse(first_number, RegionCode::GetUnknown(), &first_number_as_proto);
2658  if (error_type == NO_PARSING_ERROR) {
2659    return IsNumberMatchWithOneString(first_number_as_proto, second_number);
2660  }
2661  if (error_type == INVALID_COUNTRY_CODE_ERROR) {
2662    PhoneNumber second_number_as_proto;
2663    ErrorType error_type = Parse(second_number, RegionCode::GetUnknown(),
2664                                 &second_number_as_proto);
2665    if (error_type == NO_PARSING_ERROR) {
2666      return IsNumberMatchWithOneString(second_number_as_proto, first_number);
2667    }
2668    if (error_type == INVALID_COUNTRY_CODE_ERROR) {
2669      error_type  = ParseHelper(first_number, RegionCode::GetUnknown(), false,
2670                                false, &first_number_as_proto);
2671      if (error_type == NO_PARSING_ERROR) {
2672        error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false,
2673                                 false, &second_number_as_proto);
2674        if (error_type == NO_PARSING_ERROR) {
2675          return IsNumberMatch(first_number_as_proto, second_number_as_proto);
2676        }
2677      }
2678    }
2679  }
2680  // One or more of the phone numbers we are trying to match is not a viable
2681  // phone number.
2682  return INVALID_NUMBER;
2683}
2684
2685PhoneNumberUtil::MatchType PhoneNumberUtil::IsNumberMatchWithOneString(
2686    const PhoneNumber& first_number,
2687    const string& second_number) const {
2688  // First see if the second number has an implicit country calling code, by
2689  // attempting to parse it.
2690  PhoneNumber second_number_as_proto;
2691  ErrorType error_type =
2692      Parse(second_number, RegionCode::GetUnknown(), &second_number_as_proto);
2693  if (error_type == NO_PARSING_ERROR) {
2694    return IsNumberMatch(first_number, second_number_as_proto);
2695  }
2696  if (error_type == INVALID_COUNTRY_CODE_ERROR) {
2697    // The second number has no country calling code. EXACT_MATCH is no longer
2698    // possible.  We parse it as if the region was the same as that for the
2699    // first number, and if EXACT_MATCH is returned, we replace this with
2700    // NSN_MATCH.
2701    string first_number_region;
2702    GetRegionCodeForCountryCode(first_number.country_code(),
2703                                &first_number_region);
2704    if (first_number_region != RegionCode::GetUnknown()) {
2705      PhoneNumber second_number_with_first_number_region;
2706      Parse(second_number, first_number_region,
2707            &second_number_with_first_number_region);
2708      MatchType match = IsNumberMatch(first_number,
2709                                      second_number_with_first_number_region);
2710      if (match == EXACT_MATCH) {
2711        return NSN_MATCH;
2712      }
2713      return match;
2714    } else {
2715      // If the first number didn't have a valid country calling code, then we
2716      // parse the second number without one as well.
2717      error_type = ParseHelper(second_number, RegionCode::GetUnknown(), false,
2718                               false, &second_number_as_proto);
2719      if (error_type == NO_PARSING_ERROR) {
2720        return IsNumberMatch(first_number, second_number_as_proto);
2721      }
2722    }
2723  }
2724  // One or more of the phone numbers we are trying to match is not a viable
2725  // phone number.
2726  return INVALID_NUMBER;
2727}
2728
2729AsYouTypeFormatter* PhoneNumberUtil::GetAsYouTypeFormatter(
2730    const string& region_code) const {
2731  return new AsYouTypeFormatter(region_code);
2732}
2733
2734bool PhoneNumberUtil::CanBeInternationallyDialled(
2735    const PhoneNumber& number) const {
2736  string region_code;
2737  GetRegionCodeForNumber(number, &region_code);
2738  const PhoneMetadata* metadata = GetMetadataForRegion(region_code);
2739  if (!metadata) {
2740    // Note numbers belonging to non-geographical entities (e.g. +800 numbers)
2741    // are always internationally diallable, and will be caught here.
2742    return true;
2743  }
2744  string national_significant_number;
2745  GetNationalSignificantNumber(number, &national_significant_number);
2746  return !IsNumberMatchingDesc(
2747      national_significant_number, metadata->no_international_dialling(),
2748      reg_exps_->regexp_cache_.get());
2749}
2750
2751}  // namespace phonenumbers
2752}  // namespace i18n
2753