form_field.cc revision 7375b0cf763978c2988cd0784818fbedd11ce85a
1// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autofill/form_field.h" 6 7#include "app/l10n_util.h" 8#include "base/string_util.h" 9#include "base/utf_string_conversions.h" 10#include "chrome/browser/autofill/address_field.h" 11#include "chrome/browser/autofill/autofill_field.h" 12#include "chrome/browser/autofill/credit_card_field.h" 13#include "chrome/browser/autofill/fax_field.h" 14#include "chrome/browser/autofill/name_field.h" 15#include "chrome/browser/autofill/phone_field.h" 16#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h" 17#include "third_party/WebKit/WebKit/chromium/public/WebString.h" 18#include "grit/autofill_resources.h" 19 20// Field names from the ECML specification; see RFC 3106. We've 21// made these names lowercase since we convert labels and field names to 22// lowercase before searching. 23 24// shipping name/address fields 25const char kEcmlShipToTitle[] = "ecom_shipto_postal_name_prefix"; 26const char kEcmlShipToFirstName[] = "ecom_shipto_postal_name_first"; 27const char kEcmlShipToMiddleName[] = "ecom_shipto_postal_name_middle"; 28const char kEcmlShipToLastName[] = "ecom_shipto_postal_name_last"; 29const char kEcmlShipToNameSuffix[] = "ecom_shipto_postal_name_suffix"; 30const char kEcmlShipToCompanyName[] = "ecom_shipto_postal_company"; 31const char kEcmlShipToAddress1[] = "ecom_shipto_postal_street_line1"; 32const char kEcmlShipToAddress2[] = "ecom_shipto_postal_street_line2"; 33const char kEcmlShipToAddress3[] = "ecom_shipto_postal_street_line3"; 34const char kEcmlShipToCity[] = "ecom_shipto_postal_city"; 35const char kEcmlShipToStateProv[] = "ecom_shipto_postal_stateprov"; 36const char kEcmlShipToPostalCode[] = "ecom_shipto_postal_postalcode"; 37const char kEcmlShipToCountry[] = "ecom_shipto_postal_countrycode"; 38const char kEcmlShipToPhone[] = "ecom_shipto_telecom_phone_number"; 39const char kEcmlShipToEmail[] = "ecom_shipto_online_email"; 40 41// billing name/address fields 42const char kEcmlBillToTitle[] = "ecom_billto_postal_name_prefix"; 43const char kEcmlBillToFirstName[] = "ecom_billto_postal_name_first"; 44const char kEcmlBillToMiddleName[] = "ecom_billto_postal_name_middle"; 45const char kEcmlBillToLastName[] = "ecom_billto_postal_name_last"; 46const char kEcmlBillToNameSuffix[] = "ecom_billto_postal_name_suffix"; 47const char kEcmlBillToCompanyName[] = "ecom_billto_postal_company"; 48const char kEcmlBillToAddress1[] = "ecom_billto_postal_street_line1"; 49const char kEcmlBillToAddress2[] = "ecom_billto_postal_street_line2"; 50const char kEcmlBillToAddress3[] = "ecom_billto_postal_street_line3"; 51const char kEcmlBillToCity[] = "ecom_billto_postal_city"; 52const char kEcmlBillToStateProv[] = "ecom_billto_postal_stateprov"; 53const char kEcmlBillToPostalCode[] = "ecom_billto_postal_postalcode"; 54const char kEcmlBillToCountry[] = "ecom_billto_postal_countrycode"; 55const char kEcmlBillToPhone[] = "ecom_billto_telecom_phone_number"; 56const char kEcmlBillToEmail[] = "ecom_billto_online_email"; 57 58// credit card fields 59const char kEcmlCardHolder[] = "ecom_payment_card_name"; 60const char kEcmlCardType[] = "ecom_payment_card_type"; 61const char kEcmlCardNumber[] = "ecom_payment_card_number"; 62const char kEcmlCardVerification[] = "ecom_payment_card_verification"; 63const char kEcmlCardExpireDay[] = "ecom_payment_card_expdate_day"; 64const char kEcmlCardExpireMonth[] = "ecom_payment_card_expdate_month"; 65const char kEcmlCardExpireYear[] = "ecom_payment_card_expdate_year"; 66 67namespace { 68 69// The name of the hidden form control element. 70const char* const kControlTypeHidden = "hidden"; 71 72// The name of the radio form control element. 73const char* const kControlTypeRadio = "radio"; 74 75// The name of the checkbox form control element. 76const char* const kControlTypeCheckBox = "checkbox"; 77 78} // namespace 79 80class EmailField : public FormField { 81 public: 82 virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const { 83 bool ok = Add(field_type_map, field_, AutoFillType(EMAIL_ADDRESS)); 84 DCHECK(ok); 85 return true; 86 } 87 88 static EmailField* Parse(std::vector<AutoFillField*>::const_iterator* iter, 89 bool is_ecml) { 90 string16 pattern; 91 if (is_ecml) { 92 pattern = GetEcmlPattern(kEcmlShipToEmail, kEcmlBillToEmail, '|'); 93 } else { 94 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_EMAIL_RE); 95 } 96 97 AutoFillField* field; 98 if (ParseText(iter, pattern, &field)) 99 return new EmailField(field); 100 101 return NULL; 102 } 103 104 private: 105 explicit EmailField(AutoFillField *field) : field_(field) {} 106 107 AutoFillField* field_; 108}; 109 110FormFieldType FormField::GetFormFieldType() const { 111 return kOtherFieldType; 112} 113 114// static 115bool FormField::Match(AutoFillField* field, 116 const string16& pattern, 117 bool match_label_only) { 118 if (match_label_only) { 119 if (MatchLabel(field, pattern)) { 120 return true; 121 } 122 } else { 123 // For now, we apply the same pattern to the field's label and the field's 124 // name. Matching the name is a bit of a long shot for many patterns, but 125 // it generally doesn't hurt to try. 126 if (MatchLabel(field, pattern) || MatchName(field, pattern)) { 127 return true; 128 } 129 } 130 131 return false; 132} 133 134// static 135bool FormField::MatchName(AutoFillField* field, const string16& pattern) { 136 // TODO(jhawkins): Remove StringToLowerASCII. WebRegularExpression needs to 137 // be fixed to take WebTextCaseInsensitive into account. 138 WebKit::WebRegularExpression re(WebKit::WebString(pattern), 139 WebKit::WebTextCaseInsensitive); 140 bool match = re.match( 141 WebKit::WebString(StringToLowerASCII(field->name()))) != -1; 142 return match; 143} 144 145// static 146bool FormField::MatchLabel(AutoFillField* field, const string16& pattern) { 147 // TODO(jhawkins): Remove StringToLowerASCII. WebRegularExpression needs to 148 // be fixed to take WebTextCaseInsensitive into account. 149 WebKit::WebRegularExpression re(WebKit::WebString(pattern), 150 WebKit::WebTextCaseInsensitive); 151 bool match = re.match( 152 WebKit::WebString(StringToLowerASCII(field->label()))) != -1; 153 return match; 154} 155 156// static 157FormField* FormField::ParseFormField( 158 std::vector<AutoFillField*>::const_iterator* iter, 159 bool is_ecml) { 160 FormField *field; 161 field = EmailField::Parse(iter, is_ecml); 162 if (field != NULL) 163 return field; 164 field = PhoneField::Parse(iter, is_ecml); 165 if (field != NULL) 166 return field; 167 field = FaxField::Parse(iter); 168 if (field != NULL) 169 return field; 170 field = AddressField::Parse(iter, is_ecml); 171 if (field != NULL) 172 return field; 173 field = CreditCardField::Parse(iter, is_ecml); 174 if (field != NULL) 175 return field; 176 177 // We search for a NameField last since it matches the word "name", which is 178 // relatively general. 179 return NameField::Parse(iter, is_ecml); 180} 181 182// static 183bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 184 const string16& pattern) { 185 AutoFillField* field; 186 return ParseText(iter, pattern, &field); 187} 188 189// static 190bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 191 const string16& pattern, 192 AutoFillField** dest) { 193 return ParseText(iter, pattern, dest, false); 194} 195 196// static 197bool FormField::ParseEmptyText( 198 std::vector<AutoFillField*>::const_iterator* iter, 199 AutoFillField** dest) { 200 return ParseLabelText(iter, ASCIIToUTF16("^$"), dest); 201} 202 203// static 204bool FormField::ParseLabelText( 205 std::vector<AutoFillField*>::const_iterator* iter, 206 const string16& pattern, 207 AutoFillField** dest) { 208 return ParseText(iter, pattern, dest, true); 209} 210 211// static 212bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 213 const string16& pattern, 214 AutoFillField** dest, 215 bool match_label_only) { 216 // Some forms have one or more hidden fields before each visible input; skip 217 // past these. 218 while (**iter && LowerCaseEqualsASCII((**iter)->form_control_type(), 219 kControlTypeHidden)) 220 (*iter)++; 221 222 AutoFillField* field = **iter; 223 if (!field) 224 return false; 225 226 if (Match(field, pattern, match_label_only)) { 227 if (dest) 228 *dest = field; 229 (*iter)++; 230 return true; 231 } 232 233 return false; 234} 235 236// static 237bool FormField::ParseLabelAndName( 238 std::vector<AutoFillField*>::const_iterator* iter, 239 const string16& pattern, 240 AutoFillField** dest) { 241 AutoFillField* field = **iter; 242 if (!field) 243 return false; 244 245 if (MatchLabel(field, pattern) && MatchName(field, pattern)) { 246 if (dest) 247 *dest = field; 248 (*iter)++; 249 return true; 250 } 251 252 return false; 253} 254 255// static 256bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) { 257 // TODO(jhawkins): Handle select fields. 258 return ParseLabelAndName(iter, ASCIIToUTF16("^$"), NULL); 259} 260 261// static 262bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field, 263 const AutoFillType& type) { 264 // Several fields are optional. 265 if (field) 266 field_type_map->insert(make_pair(field->unique_name(), type.field_type())); 267 268 return true; 269} 270 271string16 FormField::GetEcmlPattern(const char* ecml_name) { 272 return ASCIIToUTF16(std::string("^") + ecml_name); 273} 274 275string16 FormField::GetEcmlPattern(const char* ecml_name1, 276 const char* ecml_name2, 277 char pattern_operator) { 278 return ASCIIToUTF16(StringPrintf("^%s%c^%s", 279 ecml_name1, pattern_operator, ecml_name2)); 280} 281 282FormFieldSet::FormFieldSet(FormStructure* fields) { 283 std::vector<AddressField*> addresses; 284 285 // First, find if there is one form field with an ECML name. If there is, 286 // then we will match an element only if it is in the standard. 287 bool is_ecml = CheckECML(fields); 288 289 // Parse fields. 290 std::vector<AutoFillField*>::const_iterator field = fields->begin(); 291 while (field != fields->end() && *field != NULL) { 292 // Don't parse hidden fields or radio or checkbox controls. 293 if (LowerCaseEqualsASCII((*field)->form_control_type(), 294 kControlTypeHidden) || 295 LowerCaseEqualsASCII((*field)->form_control_type(), 296 kControlTypeRadio) || 297 LowerCaseEqualsASCII((*field)->form_control_type(), 298 kControlTypeCheckBox)) { 299 field++; 300 continue; 301 } 302 303 FormField* form_field = FormField::ParseFormField(&field, is_ecml); 304 if (!form_field) { 305 field++; 306 continue; 307 } 308 309 push_back(form_field); 310 311 if (form_field->GetFormFieldType() == kAddressType) { 312 AddressField* address = static_cast<AddressField*>(form_field); 313 if (address->IsFullAddress()) 314 addresses.push_back(address); 315 } 316 } 317 318 // Now determine an address type for each address. Note, if this is an ECML 319 // form, then we already got this info from the field names. 320 if (!is_ecml && !addresses.empty()) { 321 if (addresses.size() == 1) { 322 addresses[0]->SetType(addresses[0]->FindType()); 323 } else { 324 AddressType type0 = addresses[0]->FindType(); 325 AddressType type1 = addresses[1]->FindType(); 326 327 // When there are two addresses on a page, they almost always appear in 328 // the order (billing, shipping). 329 bool reversed = (type0 == kShippingAddress && type1 == kBillingAddress); 330 addresses[0]->SetType(reversed ? kShippingAddress : kBillingAddress); 331 addresses[1]->SetType(reversed ? kBillingAddress : kShippingAddress); 332 } 333 } 334} 335 336bool FormFieldSet::CheckECML(FormStructure* fields) { 337 size_t num_fields = fields->field_count(); 338 struct EcmlField { 339 const char* name_; 340 const int length_; 341 } form_fields[] = { 342#define ECML_STRING_ENTRY(x) { x, arraysize(x) - 1 }, 343 ECML_STRING_ENTRY(kEcmlShipToTitle) 344 ECML_STRING_ENTRY(kEcmlShipToFirstName) 345 ECML_STRING_ENTRY(kEcmlShipToMiddleName) 346 ECML_STRING_ENTRY(kEcmlShipToLastName) 347 ECML_STRING_ENTRY(kEcmlShipToNameSuffix) 348 ECML_STRING_ENTRY(kEcmlShipToCompanyName) 349 ECML_STRING_ENTRY(kEcmlShipToAddress1) 350 ECML_STRING_ENTRY(kEcmlShipToAddress2) 351 ECML_STRING_ENTRY(kEcmlShipToAddress3) 352 ECML_STRING_ENTRY(kEcmlShipToCity) 353 ECML_STRING_ENTRY(kEcmlShipToStateProv) 354 ECML_STRING_ENTRY(kEcmlShipToPostalCode) 355 ECML_STRING_ENTRY(kEcmlShipToCountry) 356 ECML_STRING_ENTRY(kEcmlShipToPhone) 357 ECML_STRING_ENTRY(kEcmlShipToPhone) 358 ECML_STRING_ENTRY(kEcmlShipToEmail) 359 ECML_STRING_ENTRY(kEcmlBillToTitle) 360 ECML_STRING_ENTRY(kEcmlBillToFirstName) 361 ECML_STRING_ENTRY(kEcmlBillToMiddleName) 362 ECML_STRING_ENTRY(kEcmlBillToLastName) 363 ECML_STRING_ENTRY(kEcmlBillToNameSuffix) 364 ECML_STRING_ENTRY(kEcmlBillToCompanyName) 365 ECML_STRING_ENTRY(kEcmlBillToAddress1) 366 ECML_STRING_ENTRY(kEcmlBillToAddress2) 367 ECML_STRING_ENTRY(kEcmlBillToAddress3) 368 ECML_STRING_ENTRY(kEcmlBillToCity) 369 ECML_STRING_ENTRY(kEcmlBillToStateProv) 370 ECML_STRING_ENTRY(kEcmlBillToPostalCode) 371 ECML_STRING_ENTRY(kEcmlBillToCountry) 372 ECML_STRING_ENTRY(kEcmlBillToPhone) 373 ECML_STRING_ENTRY(kEcmlBillToPhone) 374 ECML_STRING_ENTRY(kEcmlBillToEmail) 375 ECML_STRING_ENTRY(kEcmlCardHolder) 376 ECML_STRING_ENTRY(kEcmlCardType) 377 ECML_STRING_ENTRY(kEcmlCardNumber) 378 ECML_STRING_ENTRY(kEcmlCardVerification) 379 ECML_STRING_ENTRY(kEcmlCardExpireMonth) 380 ECML_STRING_ENTRY(kEcmlCardExpireYear) 381#undef ECML_STRING_ENTRY 382 }; 383 384 const string16 ecom(ASCIIToUTF16("ecom")); 385 for (size_t index = 0; index < num_fields; ++index) { 386 const string16& utf16_name = fields->field(index)->name(); 387 if (StartsWith(utf16_name, ecom, true)) { 388 std::string name(UTF16ToASCII(utf16_name)); 389 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(form_fields); ++i) { 390 if (base::strncasecmp(name.c_str(), form_fields[i].name_, 391 form_fields[i].length_) == 0) { 392 return true; 393 } 394 } 395 } 396 } 397 398 return false; 399} 400