form_field.cc revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autofill/form_field.h" 6 7#include "base/string_util.h" 8#include "base/utf_string_conversions.h" 9#include "chrome/browser/autofill/address_field.h" 10#include "chrome/browser/autofill/autofill_field.h" 11#include "chrome/browser/autofill/credit_card_field.h" 12#include "chrome/browser/autofill/fax_field.h" 13#include "chrome/browser/autofill/name_field.h" 14#include "chrome/browser/autofill/phone_field.h" 15#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h" 16#include "third_party/WebKit/WebKit/chromium/public/WebString.h" 17 18// Field names from the ECML specification; see RFC 3106. We've 19// made these names lowercase since we convert labels and field names to 20// lowercase before searching. 21 22// shipping name/address fields 23const char kEcmlShipToTitle[] = "ecom_shipto_postal_name_prefix"; 24const char kEcmlShipToFirstName[] = "ecom_shipto_postal_name_first"; 25const char kEcmlShipToMiddleName[] = "ecom_shipto_postal_name_middle"; 26const char kEcmlShipToLastName[] = "ecom_shipto_postal_name_last"; 27const char kEcmlShipToNameSuffix[] = "ecom_shipto_postal_name_suffix"; 28const char kEcmlShipToCompanyName[] = "ecom_shipto_postal_company"; 29const char kEcmlShipToAddress1[] = "ecom_shipto_postal_street_line1"; 30const char kEcmlShipToAddress2[] = "ecom_shipto_postal_street_line2"; 31const char kEcmlShipToAddress3[] = "ecom_shipto_postal_street_line3"; 32const char kEcmlShipToCity[] = "ecom_shipto_postal_city"; 33const char kEcmlShipToStateProv[] = "ecom_shipto_postal_stateprov"; 34const char kEcmlShipToPostalCode[] = "ecom_shipto_postal_postalcode"; 35const char kEcmlShipToCountry[] = "ecom_shipto_postal_countrycode"; 36const char kEcmlShipToPhone[] = "ecom_shipto_telecom_phone_number"; 37const char kEcmlShipToEmail[] = "ecom_shipto_online_email"; 38 39// billing name/address fields 40const char kEcmlBillToTitle[] = "ecom_billto_postal_name_prefix"; 41const char kEcmlBillToFirstName[] = "ecom_billto_postal_name_first"; 42const char kEcmlBillToMiddleName[] = "ecom_billto_postal_name_middle"; 43const char kEcmlBillToLastName[] = "ecom_billto_postal_name_last"; 44const char kEcmlBillToNameSuffix[] = "ecom_billto_postal_name_suffix"; 45const char kEcmlBillToCompanyName[] = "ecom_billto_postal_company"; 46const char kEcmlBillToAddress1[] = "ecom_billto_postal_street_line1"; 47const char kEcmlBillToAddress2[] = "ecom_billto_postal_street_line2"; 48const char kEcmlBillToAddress3[] = "ecom_billto_postal_street_line3"; 49const char kEcmlBillToCity[] = "ecom_billto_postal_city"; 50const char kEcmlBillToStateProv[] = "ecom_billto_postal_stateprov"; 51const char kEcmlBillToPostalCode[] = "ecom_billto_postal_postalcode"; 52const char kEcmlBillToCountry[] = "ecom_billto_postal_countrycode"; 53const char kEcmlBillToPhone[] = "ecom_billto_telecom_phone_number"; 54const char kEcmlBillToEmail[] = "ecom_billto_online_email"; 55 56// credit card fields 57const char kEcmlCardHolder[] = "ecom_payment_card_name"; 58const char kEcmlCardType[] = "ecom_payment_card_type"; 59const char kEcmlCardNumber[] = "ecom_payment_card_number"; 60const char kEcmlCardVerification[] = "ecom_payment_card_verification"; 61const char kEcmlCardExpireDay[] = "ecom_payment_card_expdate_day"; 62const char kEcmlCardExpireMonth[] = "ecom_payment_card_expdate_month"; 63const char kEcmlCardExpireYear[] = "ecom_payment_card_expdate_year"; 64 65namespace { 66 67// The name of the hidden form control element. 68const char* const kControlTypeHidden = "hidden"; 69 70// The name of the radio form control element. 71const char* const kControlTypeRadio = "radio"; 72 73// The name of the checkbox form control element. 74const char* const kControlTypeCheckBox = "checkbox"; 75 76} // namespace 77 78class EmailField : public FormField { 79 public: 80 virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const { 81 bool ok = Add(field_type_map, field_, AutoFillType(EMAIL_ADDRESS)); 82 DCHECK(ok); 83 return true; 84 } 85 86 static EmailField* Parse(std::vector<AutoFillField*>::const_iterator* iter, 87 bool is_ecml) { 88 string16 pattern; 89 if (is_ecml) { 90 pattern = GetEcmlPattern(kEcmlShipToEmail, kEcmlBillToEmail, '|'); 91 } else { 92 pattern = ASCIIToUTF16("email|e-mail"); 93 } 94 95 AutoFillField* field; 96 if (ParseText(iter, pattern, &field)) 97 return new EmailField(field); 98 99 return NULL; 100 } 101 102 private: 103 explicit EmailField(AutoFillField *field) : field_(field) {} 104 105 AutoFillField* field_; 106}; 107 108// static 109bool FormField::Match(AutoFillField* field, 110 const string16& pattern, 111 bool match_label_only) { 112 if (match_label_only) { 113 if (MatchLabel(field, pattern)) { 114 return true; 115 } 116 } else { 117 // For now, we apply the same pattern to the field's label and the field's 118 // name. Matching the name is a bit of a long shot for many patterns, but 119 // it generally doesn't hurt to try. 120 if (MatchLabel(field, pattern) || MatchName(field, pattern)) { 121 return true; 122 } 123 } 124 125 return false; 126} 127 128// static 129bool FormField::MatchName(AutoFillField* field, const string16& pattern) { 130 // TODO(jhawkins): Remove StringToLowerASCII. WebRegularExpression needs to 131 // be fixed to take WebTextCaseInsensitive into account. 132 WebKit::WebRegularExpression re(WebKit::WebString(pattern), 133 WebKit::WebTextCaseInsensitive); 134 bool match = re.match( 135 WebKit::WebString(StringToLowerASCII(field->name()))) != -1; 136 return match; 137} 138 139// static 140bool FormField::MatchLabel(AutoFillField* field, const string16& pattern) { 141 // TODO(jhawkins): Remove StringToLowerASCII. WebRegularExpression needs to 142 // be fixed to take WebTextCaseInsensitive into account. 143 WebKit::WebRegularExpression re(WebKit::WebString(pattern), 144 WebKit::WebTextCaseInsensitive); 145 bool match = re.match( 146 WebKit::WebString(StringToLowerASCII(field->label()))) != -1; 147 return match; 148} 149 150// static 151FormField* FormField::ParseFormField( 152 std::vector<AutoFillField*>::const_iterator* iter, 153 bool is_ecml) { 154 FormField *field; 155 field = EmailField::Parse(iter, is_ecml); 156 if (field != NULL) 157 return field; 158 field = PhoneField::Parse(iter, is_ecml); 159 if (field != NULL) 160 return field; 161 field = FaxField::Parse(iter); 162 if (field != NULL) 163 return field; 164 field = AddressField::Parse(iter, is_ecml); 165 if (field != NULL) 166 return field; 167 field = CreditCardField::Parse(iter, is_ecml); 168 if (field != NULL) 169 return field; 170 171 // We search for a NameField last since it matches the word "name", which is 172 // relatively general. 173 return NameField::Parse(iter, is_ecml); 174} 175 176// static 177bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 178 const string16& pattern) { 179 AutoFillField* field; 180 return ParseText(iter, pattern, &field); 181} 182 183// static 184bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 185 const string16& pattern, 186 AutoFillField** dest) { 187 return ParseText(iter, pattern, dest, false); 188} 189 190// static 191bool FormField::ParseEmptyText( 192 std::vector<AutoFillField*>::const_iterator* iter, 193 AutoFillField** dest) { 194 return ParseLabelText(iter, ASCIIToUTF16("^$"), dest); 195} 196 197// static 198bool FormField::ParseLabelText( 199 std::vector<AutoFillField*>::const_iterator* iter, 200 const string16& pattern, 201 AutoFillField** dest) { 202 return ParseText(iter, pattern, dest, true); 203} 204 205// static 206bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter, 207 const string16& pattern, 208 AutoFillField** dest, 209 bool match_label_only) { 210 // Some forms have one or more hidden fields before each visible input; skip 211 // past these. 212 while (**iter && LowerCaseEqualsASCII((**iter)->form_control_type(), 213 kControlTypeHidden)) 214 (*iter)++; 215 216 AutoFillField* field = **iter; 217 if (!field) 218 return false; 219 220 if (Match(field, pattern, match_label_only)) { 221 if (dest) 222 *dest = field; 223 (*iter)++; 224 return true; 225 } 226 227 return false; 228} 229 230// static 231bool FormField::ParseLabelAndName( 232 std::vector<AutoFillField*>::const_iterator* iter, 233 const string16& pattern, 234 AutoFillField** dest) { 235 AutoFillField* field = **iter; 236 if (!field) 237 return false; 238 239 if (MatchLabel(field, pattern) && MatchName(field, pattern)) { 240 if (dest) 241 *dest = field; 242 (*iter)++; 243 return true; 244 } 245 246 return false; 247} 248 249// static 250bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) { 251 // TODO(jhawkins): Handle select fields. 252 return ParseLabelAndName(iter, ASCIIToUTF16("^$"), NULL); 253} 254 255// static 256bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field, 257 const AutoFillType& type) { 258 // Several fields are optional. 259 if (field) 260 field_type_map->insert(make_pair(field->unique_name(), type.field_type())); 261 262 return true; 263} 264 265string16 FormField::GetEcmlPattern(const char* ecml_name) { 266 return ASCIIToUTF16(std::string("^") + ecml_name); 267} 268 269string16 FormField::GetEcmlPattern(const char* ecml_name1, 270 const char* ecml_name2, 271 char pattern_operator) { 272 return ASCIIToUTF16(StringPrintf("^%s%c^%s", 273 ecml_name1, pattern_operator, ecml_name2)); 274} 275 276FormFieldSet::FormFieldSet(FormStructure* fields) { 277 std::vector<AddressField*> addresses; 278 279 // First, find if there is one form field with an ECML name. If there is, 280 // then we will match an element only if it is in the standard. 281 bool is_ecml = CheckECML(fields); 282 283 // Parse fields. 284 std::vector<AutoFillField*>::const_iterator field = fields->begin(); 285 while (field != fields->end() && *field != NULL) { 286 // Don't parse hidden fields or radio or checkbox controls. 287 if (LowerCaseEqualsASCII((*field)->form_control_type(), 288 kControlTypeHidden) || 289 LowerCaseEqualsASCII((*field)->form_control_type(), 290 kControlTypeRadio) || 291 LowerCaseEqualsASCII((*field)->form_control_type(), 292 kControlTypeCheckBox)) { 293 field++; 294 continue; 295 } 296 297 FormField* form_field = FormField::ParseFormField(&field, is_ecml); 298 if (!form_field) { 299 field++; 300 continue; 301 } 302 303 push_back(form_field); 304 305 if (form_field->GetFormFieldType() == kAddressType) { 306 AddressField* address = static_cast<AddressField*>(form_field); 307 if (address->IsFullAddress()) 308 addresses.push_back(address); 309 } 310 } 311 312 // Now determine an address type for each address. Note, if this is an ECML 313 // form, then we already got this info from the field names. 314 if (!is_ecml && !addresses.empty()) { 315 if (addresses.size() == 1) { 316 addresses[0]->SetType(addresses[0]->FindType()); 317 } else { 318 AddressType type0 = addresses[0]->FindType(); 319 AddressType type1 = addresses[1]->FindType(); 320 321 // When there are two addresses on a page, they almost always appear in 322 // the order (billing, shipping). 323 bool reversed = (type0 == kShippingAddress && type1 == kBillingAddress); 324 addresses[0]->SetType(reversed ? kShippingAddress : kBillingAddress); 325 addresses[1]->SetType(reversed ? kBillingAddress : kShippingAddress); 326 } 327 } 328} 329 330bool FormFieldSet::CheckECML(FormStructure* fields) { 331 size_t num_fields = fields->field_count(); 332 struct EcmlField { 333 const char* name_; 334 const int length_; 335 } form_fields[] = { 336#define ECML_STRING_ENTRY(x) { x, arraysize(x) - 1 }, 337 ECML_STRING_ENTRY(kEcmlShipToTitle) 338 ECML_STRING_ENTRY(kEcmlShipToFirstName) 339 ECML_STRING_ENTRY(kEcmlShipToMiddleName) 340 ECML_STRING_ENTRY(kEcmlShipToLastName) 341 ECML_STRING_ENTRY(kEcmlShipToNameSuffix) 342 ECML_STRING_ENTRY(kEcmlShipToCompanyName) 343 ECML_STRING_ENTRY(kEcmlShipToAddress1) 344 ECML_STRING_ENTRY(kEcmlShipToAddress2) 345 ECML_STRING_ENTRY(kEcmlShipToAddress3) 346 ECML_STRING_ENTRY(kEcmlShipToCity) 347 ECML_STRING_ENTRY(kEcmlShipToStateProv) 348 ECML_STRING_ENTRY(kEcmlShipToPostalCode) 349 ECML_STRING_ENTRY(kEcmlShipToCountry) 350 ECML_STRING_ENTRY(kEcmlShipToPhone) 351 ECML_STRING_ENTRY(kEcmlShipToPhone) 352 ECML_STRING_ENTRY(kEcmlShipToEmail) 353 ECML_STRING_ENTRY(kEcmlBillToTitle) 354 ECML_STRING_ENTRY(kEcmlBillToFirstName) 355 ECML_STRING_ENTRY(kEcmlBillToMiddleName) 356 ECML_STRING_ENTRY(kEcmlBillToLastName) 357 ECML_STRING_ENTRY(kEcmlBillToNameSuffix) 358 ECML_STRING_ENTRY(kEcmlBillToCompanyName) 359 ECML_STRING_ENTRY(kEcmlBillToAddress1) 360 ECML_STRING_ENTRY(kEcmlBillToAddress2) 361 ECML_STRING_ENTRY(kEcmlBillToAddress3) 362 ECML_STRING_ENTRY(kEcmlBillToCity) 363 ECML_STRING_ENTRY(kEcmlBillToStateProv) 364 ECML_STRING_ENTRY(kEcmlBillToPostalCode) 365 ECML_STRING_ENTRY(kEcmlBillToCountry) 366 ECML_STRING_ENTRY(kEcmlBillToPhone) 367 ECML_STRING_ENTRY(kEcmlBillToPhone) 368 ECML_STRING_ENTRY(kEcmlBillToEmail) 369 ECML_STRING_ENTRY(kEcmlCardHolder) 370 ECML_STRING_ENTRY(kEcmlCardType) 371 ECML_STRING_ENTRY(kEcmlCardNumber) 372 ECML_STRING_ENTRY(kEcmlCardVerification) 373 ECML_STRING_ENTRY(kEcmlCardExpireMonth) 374 ECML_STRING_ENTRY(kEcmlCardExpireYear) 375#undef ECML_STRING_ENTRY 376 }; 377 378 const string16 ecom(ASCIIToUTF16("ecom")); 379 for (size_t index = 0; index < num_fields; ++index) { 380 const string16& utf16_name = fields->field(index)->name(); 381 if (StartsWith(utf16_name, ecom, true)) { 382 std::string name(UTF16ToASCII(utf16_name)); 383 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(form_fields); ++i) { 384 if (base::strncasecmp(name.c_str(), form_fields[i].name_, 385 form_fields[i].length_) == 0) { 386 return true; 387 } 388 } 389 } 390 } 391 392 return false; 393} 394