1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Author: Lara Rennie
16// Author: Tao Huang
17//
18// This is a direct port from PhoneNumberMatcher.java.
19// Changes to this class should also happen to the Java version, whenever it
20// makes sense.
21
22#ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
23#define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
24
25#include <string>
26#include <vector>
27
28#include "phonenumbers/base/basictypes.h"
29#include "phonenumbers/base/memory/scoped_ptr.h"
30#include "phonenumbers/callback.h"
31#include "phonenumbers/regexp_adapter.h"
32
33namespace i18n {
34namespace phonenumbers {
35
36template <class R, class A1, class A2, class A3, class A4>
37    class ResultCallback4;
38
39using std::string;
40using std::vector;
41
42class AlternateFormats;
43class NumberFormat;
44class PhoneNumber;
45class PhoneNumberMatch;
46class PhoneNumberMatcherRegExps;
47class PhoneNumberUtil;
48
49class PhoneNumberMatcher {
50  friend class PhoneNumberMatcherTest;
51 public:
52  // Leniency when finding potential phone numbers in text segments. The levels
53  // here are ordered in increasing strictness.
54  enum Leniency {
55    // Phone numbers accepted are possible, but not necessarily valid.
56    POSSIBLE,
57    // Phone numbers accepted are possible and valid.
58    VALID,
59    // Phone numbers accepted are valid and are grouped in a possible way for
60    // this locale. For example, a US number written as "65 02 53 00 00" is not
61    // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
62    // are. Numbers with more than one '/' symbol are also dropped at this
63    // level.
64    // Warning: The next two levels might result in lower coverage especially
65    // for regions outside of country code "+1". If you are not sure about which
66    // level to use, you can send an e-mail to the discussion group
67    // http://groups.google.com/group/libphonenumber-discuss/
68    STRICT_GROUPING,
69    // Phone numbers accepted are valid and are grouped in the same way that we
70    // would have formatted it, or as a single block. For example, a US number
71    // written as "650 2530000" is not accepted at this leniency level, whereas
72    // "650 253 0000" or "6502530000" are.
73    EXACT_GROUPING,
74  };
75
76  // Constructs a phone number matcher.
77  PhoneNumberMatcher(const PhoneNumberUtil& util,
78                     const string& text,
79                     const string& region_code,
80                     Leniency leniency,
81                     int max_tries);
82
83  // Wrapper to construct a phone number matcher, with no limitation on the
84  // number of retries and VALID Leniency.
85  PhoneNumberMatcher(const string& text,
86                     const string& region_code);
87
88  ~PhoneNumberMatcher();
89
90  // Returns true if the text sequence has another match.
91  bool HasNext();
92
93  // Gets next match from text sequence.
94  bool Next(PhoneNumberMatch* match);
95
96 private:
97  // The potential states of a PhoneNumberMatcher.
98  enum State {
99    NOT_READY,
100    READY,
101    DONE,
102  };
103
104  // Attempts to extract a match from a candidate string. Returns true if a
105  // match is found, otherwise returns false. The value "offset" refers to the
106  // start index of the candidate string within the overall text.
107  bool Find(int index, PhoneNumberMatch* match);
108
109  // Checks a number was formatted with a national prefix, if the number was
110  // found in national format, and a national prefix is required for that
111  // number. Returns false if the number needed to have a national prefix and
112  // none was found.
113  bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const;
114
115  // Attempts to extract a match from candidate. Returns true if the match was
116  // found, otherwise returns false.
117  bool ExtractMatch(const string& candidate, int offset,
118                    PhoneNumberMatch* match);
119
120  // Attempts to extract a match from a candidate string if the whole candidate
121  // does not qualify as a match. Returns true if a match is found, otherwise
122  // returns false.
123  bool ExtractInnerMatch(const string& candidate, int offset,
124                         PhoneNumberMatch* match);
125
126  // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
127  // verifies it matches the requested leniency. If parsing and verification
128  // succeed, returns true, otherwise this method returns false;
129  bool ParseAndVerify(const string& candidate, int offset,
130                      PhoneNumberMatch* match);
131
132  bool CheckNumberGroupingIsValid(
133    const PhoneNumber& phone_number,
134    const string& candidate,
135    ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&,
136                    const string&, const vector<string>&>* checker) const;
137
138  void GetNationalNumberGroups(
139      const PhoneNumber& number,
140      const NumberFormat* formatting_pattern,
141      vector<string>* digit_blocks) const;
142
143  bool AllNumberGroupsAreExactlyPresent(
144      const PhoneNumberUtil& util,
145      const PhoneNumber& phone_number,
146      const string& normalized_candidate,
147      const vector<string>& formatted_number_groups) const;
148
149  bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
150                                 const string& candidate) const;
151
152  // Helper method to determine if a character is a Latin-script letter or not.
153  // For our purposes, combining marks should also return true since we assume
154  // they have been added to a preceding Latin character.
155  static bool IsLatinLetter(char32 letter);
156
157  // Helper class holding useful regular expressions.
158  const PhoneNumberMatcherRegExps* reg_exps_;
159
160  // Helper class holding loaded data containing alternate ways phone numbers
161  // might be formatted for certain regions.
162  const AlternateFormats* alternate_formats_;
163
164  // The phone number utility;
165  const PhoneNumberUtil& phone_util_;
166
167  // The text searched for phone numbers;
168  const string text_;
169
170  // The region(country) to assume for phone numbers without an international
171  // prefix.
172  const string preferred_region_;
173
174  // The degree of validation requested.
175  Leniency leniency_;
176
177  // The maximum number of retries after matching an invalid number.
178  int max_tries_;
179
180  // The iteration tristate.
181  State state_;
182
183  // The last successful match, NULL unless in State.READY.
184  scoped_ptr<PhoneNumberMatch> last_match_;
185
186  // The next index to start searching at. Undefined in State.DONE.
187  int search_index_;
188
189  DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
190};
191
192}  // namespace phonenumbers
193}  // namespace i18n
194
195#endif  // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
196