1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/phone_field.h"
6
7#include "base/logging.h"
8#include "base/memory/scoped_ptr.h"
9#include "base/strings/string16.h"
10#include "base/strings/string_util.h"
11#include "base/strings/utf_string_conversions.h"
12#include "components/autofill/core/browser/autofill_field.h"
13#include "components/autofill/core/browser/autofill_regex_constants.h"
14#include "components/autofill/core/browser/autofill_scanner.h"
15#include "ui/base/l10n/l10n_util.h"
16
17namespace autofill {
18namespace {
19
20// This string includes all area code separators, including NoText.
21base::string16 GetAreaRegex() {
22  base::string16 area_code = base::UTF8ToUTF16(autofill::kAreaCodeRe);
23  area_code.append(base::ASCIIToUTF16("|"));  // Regexp separator.
24  area_code.append(base::UTF8ToUTF16(autofill::kAreaCodeNotextRe));
25  return area_code;
26}
27
28}  // namespace
29
30PhoneField::~PhoneField() {}
31
32// Phone field grammars - first matched grammar will be parsed. Grammars are
33// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
34// parsed separately unless they are necessary parts of the match.
35// The following notation is used to describe the patterns:
36// <cc> - country code field.
37// <ac> - area code field.
38// <phone> - phone or prefix.
39// <suffix> - suffix.
40// <ext> - extension.
41// :N means field is limited to N characters, otherwise it is unlimited.
42// (pattern <field>)? means pattern is optional and matched separately.
43const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = {
44  // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
45  // (Ext: <ext>)?)?
46  { REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0 },
47  { REGEX_AREA, FIELD_AREA_CODE, 0 },
48  { REGEX_PHONE, FIELD_PHONE, 0 },
49  { REGEX_SEPARATOR, FIELD_NONE, 0 },
50  // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
51  { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3 },
52  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
53  { REGEX_PHONE, FIELD_SUFFIX, 4 },
54  { REGEX_SEPARATOR, FIELD_NONE, 0 },
55  // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
56  { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
57  { REGEX_PHONE, FIELD_AREA_CODE, 3 },
58  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
59  { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
60  { REGEX_SEPARATOR, FIELD_NONE, 0 },
61  // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
62  { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
63  { REGEX_PHONE, FIELD_AREA_CODE, 3 },
64  { REGEX_PHONE, FIELD_PHONE, 3 },
65  { REGEX_PHONE, FIELD_SUFFIX, 4 },
66  { REGEX_SEPARATOR, FIELD_NONE, 0 },
67  // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
68  { REGEX_AREA, FIELD_AREA_CODE, 0 },
69  { REGEX_PHONE, FIELD_PHONE, 0 },
70  { REGEX_SEPARATOR, FIELD_NONE, 0 },
71  // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
72  { REGEX_PHONE, FIELD_AREA_CODE, 0 },
73  { REGEX_PHONE, FIELD_PHONE, 3 },
74  { REGEX_PHONE, FIELD_SUFFIX, 4 },
75  { REGEX_SEPARATOR, FIELD_NONE, 0 },
76  // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
77  { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
78  { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
79  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
80  { REGEX_SEPARATOR, FIELD_NONE, 0 },
81  // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
82  { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
83  { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
84  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
85  { REGEX_SEPARATOR, FIELD_NONE, 0 },
86  // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
87  { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
88  { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
89  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
90  { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0 },
91  { REGEX_SEPARATOR, FIELD_NONE, 0 },
92  // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
93  { REGEX_PHONE, FIELD_AREA_CODE, 0 },
94  { REGEX_PREFIX, FIELD_PHONE, 0 },
95  { REGEX_SUFFIX, FIELD_SUFFIX, 0 },
96  { REGEX_SEPARATOR, FIELD_NONE, 0 },
97  // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
98  { REGEX_PHONE, FIELD_AREA_CODE, 0 },
99  { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
100  { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
101  { REGEX_SEPARATOR, FIELD_NONE, 0 },
102  // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
103  { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
104  { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
105  { REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0 },
106  { REGEX_SEPARATOR, FIELD_NONE, 0 },
107  // Phone: <ac> - <phone> (Ext: <ext>)?
108  { REGEX_AREA, FIELD_AREA_CODE, 0 },
109  { REGEX_PHONE, FIELD_PHONE, 0 },
110  { REGEX_SEPARATOR, FIELD_NONE, 0 },
111  // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
112  { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
113  { REGEX_PHONE, FIELD_PHONE, 10 },
114  { REGEX_SEPARATOR, FIELD_NONE, 0 },
115  // Phone: <phone> (Ext: <ext>)?
116  { REGEX_PHONE, FIELD_PHONE, 0 },
117  { REGEX_SEPARATOR, FIELD_NONE, 0 },
118};
119
120// static
121FormField* PhoneField::Parse(AutofillScanner* scanner) {
122  if (scanner->IsEnd())
123    return NULL;
124
125  scanner->SaveCursor();
126
127  // The form owns the following variables, so they should not be deleted.
128  AutofillField* parsed_fields[FIELD_MAX];
129
130  for (size_t i = 0; i < arraysize(kPhoneFieldGrammars); ++i) {
131    memset(parsed_fields, 0, sizeof(parsed_fields));
132    scanner->SaveCursor();
133
134    // Attempt to parse according to the next grammar.
135    for (; i < arraysize(kPhoneFieldGrammars) &&
136         kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR; ++i) {
137      if (!ParseFieldSpecifics(
138              scanner,
139              GetRegExp(kPhoneFieldGrammars[i].regex),
140              MATCH_DEFAULT | MATCH_TELEPHONE,
141              &parsed_fields[kPhoneFieldGrammars[i].phone_part]))
142        break;
143      if (kPhoneFieldGrammars[i].max_size &&
144          (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
145            kPhoneFieldGrammars[i].max_size <
146            parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) {
147        break;
148      }
149    }
150
151    if (i >= arraysize(kPhoneFieldGrammars)) {
152      scanner->Rewind();
153      return NULL;  // Parsing failed.
154    }
155    if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR)
156      break;  // Parsing succeeded.
157
158    // Proceed to the next grammar.
159    do {
160      ++i;
161    } while (i < arraysize(kPhoneFieldGrammars) &&
162             kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR);
163
164    if (i + 1 == arraysize(kPhoneFieldGrammars)) {
165      scanner->Rewind();
166      return NULL;  // Tried through all the possibilities - did not match.
167    }
168
169    scanner->Rewind();
170  }
171
172  if (!parsed_fields[FIELD_PHONE]) {
173    scanner->Rewind();
174    return NULL;
175  }
176
177  scoped_ptr<PhoneField> phone_field(new PhoneField);
178  for (int i = 0; i < FIELD_MAX; ++i)
179    phone_field->parsed_phone_fields_[i] = parsed_fields[i];
180
181  // Look for optional fields.
182
183  // Look for a third text box.
184  if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
185    if (!ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixRe),
186                    &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
187      ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe),
188                 &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
189    }
190  }
191
192  // Now look for an extension.
193  ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneExtensionRe),
194             &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
195
196  return phone_field.release();
197}
198
199bool PhoneField::ClassifyField(ServerFieldTypeMap* map) const {
200  bool ok = true;
201
202  DCHECK(parsed_phone_fields_[FIELD_PHONE]);  // Phone was correctly parsed.
203
204  if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
205      (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
206      (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
207    if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
208      ok = ok && AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE],
209                                   PHONE_HOME_COUNTRY_CODE,
210                                   map);
211    }
212
213    ServerFieldType field_number_type = PHONE_HOME_NUMBER;
214    if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
215      ok = ok && AddClassification(parsed_phone_fields_[FIELD_AREA_CODE],
216                                   PHONE_HOME_CITY_CODE,
217                                   map);
218    } else if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
219      // Only if we can find country code without city code, it means the phone
220      // number include city code.
221      field_number_type = PHONE_HOME_CITY_AND_NUMBER;
222    }
223    // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
224    // we fill only the prefix depending on the size of the input field.
225    ok = ok && AddClassification(parsed_phone_fields_[FIELD_PHONE],
226                                 field_number_type,
227                                 map);
228    // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
229    // we fill only the suffix depending on the size of the input field.
230    if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
231      ok = ok && AddClassification(parsed_phone_fields_[FIELD_SUFFIX],
232                                   PHONE_HOME_NUMBER,
233                                   map);
234    }
235  } else {
236    ok = AddClassification(parsed_phone_fields_[FIELD_PHONE],
237                           PHONE_HOME_WHOLE_NUMBER,
238                           map);
239  }
240
241  return ok;
242}
243
244PhoneField::PhoneField() {
245  memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
246}
247
248// static
249base::string16 PhoneField::GetRegExp(RegexType regex_id) {
250  switch (regex_id) {
251    case REGEX_COUNTRY:
252      return base::UTF8ToUTF16(autofill::kCountryCodeRe);
253    case REGEX_AREA:
254      return GetAreaRegex();
255    case REGEX_AREA_NOTEXT:
256      return base::UTF8ToUTF16(autofill::kAreaCodeNotextRe);
257    case REGEX_PHONE:
258      return base::UTF8ToUTF16(autofill::kPhoneRe);
259    case REGEX_PREFIX_SEPARATOR:
260      return base::UTF8ToUTF16(autofill::kPhonePrefixSeparatorRe);
261    case REGEX_PREFIX:
262      return base::UTF8ToUTF16(autofill::kPhonePrefixRe);
263    case REGEX_SUFFIX_SEPARATOR:
264      return base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe);
265    case REGEX_SUFFIX:
266      return base::UTF8ToUTF16(autofill::kPhoneSuffixRe);
267    case REGEX_EXTENSION:
268      return base::UTF8ToUTF16(autofill::kPhoneExtensionRe);
269    default:
270      NOTREACHED();
271      break;
272  }
273  return base::string16();
274}
275
276}  // namespace autofill
277