phone_field.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/phone_field.h" 6 7#include "base/logging.h" 8#include "base/memory/scoped_ptr.h" 9#include "base/strings/string16.h" 10#include "base/strings/string_util.h" 11#include "base/strings/utf_string_conversions.h" 12#include "components/autofill/core/browser/autofill_field.h" 13#include "components/autofill/core/browser/autofill_regex_constants.h" 14#include "components/autofill/core/browser/autofill_scanner.h" 15#include "ui/base/l10n/l10n_util.h" 16 17namespace autofill { 18namespace { 19 20// This string includes all area code separators, including NoText. 21base::string16 GetAreaRegex() { 22 base::string16 area_code = base::UTF8ToUTF16(autofill::kAreaCodeRe); 23 area_code.append(base::ASCIIToUTF16("|")); // Regexp separator. 24 area_code.append(base::UTF8ToUTF16(autofill::kAreaCodeNotextRe)); 25 return area_code; 26} 27 28} // namespace 29 30PhoneField::~PhoneField() {} 31 32// Phone field grammars - first matched grammar will be parsed. Grammars are 33// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are 34// parsed separately unless they are necessary parts of the match. 35// The following notation is used to describe the patterns: 36// <cc> - country code field. 37// <ac> - area code field. 38// <phone> - phone or prefix. 39// <suffix> - suffix. 40// <ext> - extension. 41// :N means field is limited to N characters, otherwise it is unlimited. 42// (pattern <field>)? means pattern is optional and matched separately. 43const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = { 44 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix> 45 // (Ext: <ext>)?)? 46 { REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0 }, 47 { REGEX_AREA, FIELD_AREA_CODE, 0 }, 48 { REGEX_PHONE, FIELD_PHONE, 0 }, 49 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 50 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)? 51 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3 }, 52 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 }, 53 { REGEX_PHONE, FIELD_SUFFIX, 4 }, 54 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 55 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)? 56 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 }, 57 { REGEX_PHONE, FIELD_AREA_CODE, 3 }, 58 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 }, 59 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 }, 60 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 61 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)? 62 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 }, 63 { REGEX_PHONE, FIELD_AREA_CODE, 3 }, 64 { REGEX_PHONE, FIELD_PHONE, 3 }, 65 { REGEX_PHONE, FIELD_SUFFIX, 4 }, 66 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 67 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)? 68 { REGEX_AREA, FIELD_AREA_CODE, 0 }, 69 { REGEX_PHONE, FIELD_PHONE, 0 }, 70 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 71 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)? 72 { REGEX_PHONE, FIELD_AREA_CODE, 0 }, 73 { REGEX_PHONE, FIELD_PHONE, 3 }, 74 { REGEX_PHONE, FIELD_SUFFIX, 4 }, 75 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 76 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? 77 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 }, 78 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 }, 79 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 }, 80 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 81 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)? 82 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 }, 83 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 }, 84 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 }, 85 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 86 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)? 87 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 }, 88 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 }, 89 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 }, 90 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0 }, 91 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 92 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)? 93 { REGEX_PHONE, FIELD_AREA_CODE, 0 }, 94 { REGEX_PREFIX, FIELD_PHONE, 0 }, 95 { REGEX_SUFFIX, FIELD_SUFFIX, 0 }, 96 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 97 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)? 98 { REGEX_PHONE, FIELD_AREA_CODE, 0 }, 99 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 }, 100 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 }, 101 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 102 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)? 103 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 }, 104 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 }, 105 { REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0 }, 106 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 107 // Phone: <ac> - <phone> (Ext: <ext>)? 108 { REGEX_AREA, FIELD_AREA_CODE, 0 }, 109 { REGEX_PHONE, FIELD_PHONE, 0 }, 110 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 111 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)? 112 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 }, 113 { REGEX_PHONE, FIELD_PHONE, 10 }, 114 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 115 // Phone: <phone> (Ext: <ext>)? 116 { REGEX_PHONE, FIELD_PHONE, 0 }, 117 { REGEX_SEPARATOR, FIELD_NONE, 0 }, 118}; 119 120// static 121FormField* PhoneField::Parse(AutofillScanner* scanner) { 122 if (scanner->IsEnd()) 123 return NULL; 124 125 scanner->SaveCursor(); 126 127 // The form owns the following variables, so they should not be deleted. 128 const AutofillField* parsed_fields[FIELD_MAX]; 129 130 for (size_t i = 0; i < arraysize(kPhoneFieldGrammars); ++i) { 131 memset(parsed_fields, 0, sizeof(parsed_fields)); 132 scanner->SaveCursor(); 133 134 // Attempt to parse according to the next grammar. 135 for (; i < arraysize(kPhoneFieldGrammars) && 136 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR; ++i) { 137 if (!ParseFieldSpecifics( 138 scanner, 139 GetRegExp(kPhoneFieldGrammars[i].regex), 140 MATCH_DEFAULT | MATCH_TELEPHONE, 141 &parsed_fields[kPhoneFieldGrammars[i].phone_part])) 142 break; 143 if (kPhoneFieldGrammars[i].max_size && 144 (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length || 145 kPhoneFieldGrammars[i].max_size < 146 parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) { 147 break; 148 } 149 } 150 151 if (i >= arraysize(kPhoneFieldGrammars)) { 152 scanner->Rewind(); 153 return NULL; // Parsing failed. 154 } 155 if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR) 156 break; // Parsing succeeded. 157 158 // Proceed to the next grammar. 159 do { 160 ++i; 161 } while (i < arraysize(kPhoneFieldGrammars) && 162 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR); 163 164 if (i + 1 == arraysize(kPhoneFieldGrammars)) { 165 scanner->Rewind(); 166 return NULL; // Tried through all the possibilities - did not match. 167 } 168 169 scanner->Rewind(); 170 } 171 172 if (!parsed_fields[FIELD_PHONE]) { 173 scanner->Rewind(); 174 return NULL; 175 } 176 177 scoped_ptr<PhoneField> phone_field(new PhoneField); 178 for (int i = 0; i < FIELD_MAX; ++i) 179 phone_field->parsed_phone_fields_[i] = parsed_fields[i]; 180 181 // Look for optional fields. 182 183 // Look for a third text box. 184 if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) { 185 if (!ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixRe), 186 &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) { 187 ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe), 188 &phone_field->parsed_phone_fields_[FIELD_SUFFIX]); 189 } 190 } 191 192 // Now look for an extension. 193 ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneExtensionRe), 194 &phone_field->parsed_phone_fields_[FIELD_EXTENSION]); 195 196 return phone_field.release(); 197} 198 199bool PhoneField::ClassifyField(ServerFieldTypeMap* map) const { 200 bool ok = true; 201 202 DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed. 203 204 if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) || 205 (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) || 206 (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) { 207 if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { 208 ok = ok && AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE], 209 PHONE_HOME_COUNTRY_CODE, 210 map); 211 } 212 213 ServerFieldType field_number_type = PHONE_HOME_NUMBER; 214 if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) { 215 ok = ok && AddClassification(parsed_phone_fields_[FIELD_AREA_CODE], 216 PHONE_HOME_CITY_CODE, 217 map); 218 } else if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) { 219 // Only if we can find country code without city code, it means the phone 220 // number include city code. 221 field_number_type = PHONE_HOME_CITY_AND_NUMBER; 222 } 223 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form 224 // we fill only the prefix depending on the size of the input field. 225 ok = ok && AddClassification(parsed_phone_fields_[FIELD_PHONE], 226 field_number_type, 227 map); 228 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form 229 // we fill only the suffix depending on the size of the input field. 230 if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) { 231 ok = ok && AddClassification(parsed_phone_fields_[FIELD_SUFFIX], 232 PHONE_HOME_NUMBER, 233 map); 234 } 235 } else { 236 ok = AddClassification(parsed_phone_fields_[FIELD_PHONE], 237 PHONE_HOME_WHOLE_NUMBER, 238 map); 239 } 240 241 return ok; 242} 243 244PhoneField::PhoneField() { 245 memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_)); 246} 247 248// static 249base::string16 PhoneField::GetRegExp(RegexType regex_id) { 250 switch (regex_id) { 251 case REGEX_COUNTRY: 252 return base::UTF8ToUTF16(autofill::kCountryCodeRe); 253 case REGEX_AREA: 254 return GetAreaRegex(); 255 case REGEX_AREA_NOTEXT: 256 return base::UTF8ToUTF16(autofill::kAreaCodeNotextRe); 257 case REGEX_PHONE: 258 return base::UTF8ToUTF16(autofill::kPhoneRe); 259 case REGEX_PREFIX_SEPARATOR: 260 return base::UTF8ToUTF16(autofill::kPhonePrefixSeparatorRe); 261 case REGEX_PREFIX: 262 return base::UTF8ToUTF16(autofill::kPhonePrefixRe); 263 case REGEX_SUFFIX_SEPARATOR: 264 return base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe); 265 case REGEX_SUFFIX: 266 return base::UTF8ToUTF16(autofill::kPhoneSuffixRe); 267 case REGEX_EXTENSION: 268 return base::UTF8ToUTF16(autofill::kPhoneExtensionRe); 269 default: 270 NOTREACHED(); 271 break; 272 } 273 return base::string16(); 274} 275 276} // namespace autofill 277