form_field.cc revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autofill/form_field.h"
6
7#include "base/string_util.h"
8#include "base/utf_string_conversions.h"
9#include "chrome/browser/autofill/address_field.h"
10#include "chrome/browser/autofill/autofill_field.h"
11#include "chrome/browser/autofill/credit_card_field.h"
12#include "chrome/browser/autofill/fax_field.h"
13#include "chrome/browser/autofill/name_field.h"
14#include "chrome/browser/autofill/phone_field.h"
15#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h"
16#include "third_party/WebKit/WebKit/chromium/public/WebString.h"
17
18// Field names from the ECML specification; see RFC 3106.  We've
19// made these names lowercase since we convert labels and field names to
20// lowercase before searching.
21
22// shipping name/address fields
23const char kEcmlShipToTitle[] = "ecom_shipto_postal_name_prefix";
24const char kEcmlShipToFirstName[] = "ecom_shipto_postal_name_first";
25const char kEcmlShipToMiddleName[] = "ecom_shipto_postal_name_middle";
26const char kEcmlShipToLastName[] = "ecom_shipto_postal_name_last";
27const char kEcmlShipToNameSuffix[] = "ecom_shipto_postal_name_suffix";
28const char kEcmlShipToCompanyName[] = "ecom_shipto_postal_company";
29const char kEcmlShipToAddress1[] = "ecom_shipto_postal_street_line1";
30const char kEcmlShipToAddress2[] = "ecom_shipto_postal_street_line2";
31const char kEcmlShipToAddress3[] = "ecom_shipto_postal_street_line3";
32const char kEcmlShipToCity[] = "ecom_shipto_postal_city";
33const char kEcmlShipToStateProv[] = "ecom_shipto_postal_stateprov";
34const char kEcmlShipToPostalCode[] = "ecom_shipto_postal_postalcode";
35const char kEcmlShipToCountry[] = "ecom_shipto_postal_countrycode";
36const char kEcmlShipToPhone[] = "ecom_shipto_telecom_phone_number";
37const char kEcmlShipToEmail[] = "ecom_shipto_online_email";
38
39// billing name/address fields
40const char kEcmlBillToTitle[] = "ecom_billto_postal_name_prefix";
41const char kEcmlBillToFirstName[] = "ecom_billto_postal_name_first";
42const char kEcmlBillToMiddleName[] = "ecom_billto_postal_name_middle";
43const char kEcmlBillToLastName[] = "ecom_billto_postal_name_last";
44const char kEcmlBillToNameSuffix[] = "ecom_billto_postal_name_suffix";
45const char kEcmlBillToCompanyName[] = "ecom_billto_postal_company";
46const char kEcmlBillToAddress1[] = "ecom_billto_postal_street_line1";
47const char kEcmlBillToAddress2[] = "ecom_billto_postal_street_line2";
48const char kEcmlBillToAddress3[] = "ecom_billto_postal_street_line3";
49const char kEcmlBillToCity[] = "ecom_billto_postal_city";
50const char kEcmlBillToStateProv[] = "ecom_billto_postal_stateprov";
51const char kEcmlBillToPostalCode[] = "ecom_billto_postal_postalcode";
52const char kEcmlBillToCountry[] = "ecom_billto_postal_countrycode";
53const char kEcmlBillToPhone[] = "ecom_billto_telecom_phone_number";
54const char kEcmlBillToEmail[] = "ecom_billto_online_email";
55
56// credit card fields
57const char kEcmlCardHolder[] = "ecom_payment_card_name";
58const char kEcmlCardType[] = "ecom_payment_card_type";
59const char kEcmlCardNumber[] = "ecom_payment_card_number";
60const char kEcmlCardVerification[] = "ecom_payment_card_verification";
61const char kEcmlCardExpireDay[] = "ecom_payment_card_expdate_day";
62const char kEcmlCardExpireMonth[] = "ecom_payment_card_expdate_month";
63const char kEcmlCardExpireYear[] = "ecom_payment_card_expdate_year";
64
65namespace {
66
67// The name of the hidden form control element.
68const char* const kControlTypeHidden = "hidden";
69
70// The name of the radio form control element.
71const char* const kControlTypeRadio = "radio";
72
73// The name of the checkbox form control element.
74const char* const kControlTypeCheckBox = "checkbox";
75
76}  // namespace
77
78class EmailField : public FormField {
79 public:
80  virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const {
81    bool ok = Add(field_type_map, field_, AutoFillType(EMAIL_ADDRESS));
82    DCHECK(ok);
83    return true;
84  }
85
86  static EmailField* Parse(std::vector<AutoFillField*>::const_iterator* iter,
87                          bool is_ecml) {
88    string16 pattern;
89    if (is_ecml) {
90      pattern = GetEcmlPattern(kEcmlShipToEmail, kEcmlBillToEmail, '|');
91    } else {
92      pattern = ASCIIToUTF16("email|e-mail");
93    }
94
95    AutoFillField* field;
96    if (ParseText(iter, pattern, &field))
97      return new EmailField(field);
98
99    return NULL;
100  }
101
102 private:
103  explicit EmailField(AutoFillField *field) : field_(field) {}
104
105  AutoFillField* field_;
106};
107
108// static
109bool FormField::Match(AutoFillField* field,
110                      const string16& pattern,
111                      bool match_label_only) {
112  if (match_label_only) {
113    if (MatchLabel(field, pattern)) {
114      return true;
115    }
116  } else {
117    // For now, we apply the same pattern to the field's label and the field's
118    // name.  Matching the name is a bit of a long shot for many patterns, but
119    // it generally doesn't hurt to try.
120    if (MatchLabel(field, pattern) || MatchName(field, pattern)) {
121      return true;
122    }
123  }
124
125  return false;
126}
127
128// static
129bool FormField::MatchName(AutoFillField* field, const string16& pattern) {
130  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
131  // be fixed to take WebTextCaseInsensitive into account.
132  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
133                                  WebKit::WebTextCaseInsensitive);
134  bool match = re.match(
135      WebKit::WebString(StringToLowerASCII(field->name()))) != -1;
136  return match;
137}
138
139// static
140bool FormField::MatchLabel(AutoFillField* field, const string16& pattern) {
141  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
142  // be fixed to take WebTextCaseInsensitive into account.
143  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
144                                  WebKit::WebTextCaseInsensitive);
145  bool match = re.match(
146      WebKit::WebString(StringToLowerASCII(field->label()))) != -1;
147  return match;
148}
149
150// static
151FormField* FormField::ParseFormField(
152    std::vector<AutoFillField*>::const_iterator* iter,
153    bool is_ecml) {
154  FormField *field;
155  field = EmailField::Parse(iter, is_ecml);
156  if (field != NULL)
157    return field;
158  field = PhoneField::Parse(iter, is_ecml);
159  if (field != NULL)
160    return field;
161  field = FaxField::Parse(iter);
162  if (field != NULL)
163    return field;
164  field = AddressField::Parse(iter, is_ecml);
165  if (field != NULL)
166    return field;
167  field = CreditCardField::Parse(iter, is_ecml);
168  if (field != NULL)
169    return field;
170
171  // We search for a NameField last since it matches the word "name", which is
172  // relatively general.
173  return NameField::Parse(iter, is_ecml);
174}
175
176// static
177bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
178                          const string16& pattern) {
179  AutoFillField* field;
180  return ParseText(iter, pattern, &field);
181}
182
183// static
184bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
185                          const string16& pattern,
186                          AutoFillField** dest) {
187  return ParseText(iter, pattern, dest, false);
188}
189
190// static
191bool FormField::ParseEmptyText(
192    std::vector<AutoFillField*>::const_iterator* iter,
193    AutoFillField** dest) {
194  return ParseLabelText(iter, ASCIIToUTF16("^$"), dest);
195}
196
197// static
198bool FormField::ParseLabelText(
199    std::vector<AutoFillField*>::const_iterator* iter,
200    const string16& pattern,
201    AutoFillField** dest) {
202  return ParseText(iter, pattern, dest, true);
203}
204
205// static
206bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
207                          const string16& pattern,
208                          AutoFillField** dest,
209                          bool match_label_only) {
210  // Some forms have one or more hidden fields before each visible input; skip
211  // past these.
212  while (**iter && LowerCaseEqualsASCII((**iter)->form_control_type(),
213                                        kControlTypeHidden))
214    (*iter)++;
215
216  AutoFillField* field = **iter;
217  if (!field)
218    return false;
219
220  if (Match(field, pattern, match_label_only)) {
221    if (dest)
222      *dest = field;
223    (*iter)++;
224    return true;
225  }
226
227  return false;
228}
229
230// static
231bool FormField::ParseLabelAndName(
232    std::vector<AutoFillField*>::const_iterator* iter,
233    const string16& pattern,
234    AutoFillField** dest) {
235  AutoFillField* field = **iter;
236  if (!field)
237    return false;
238
239  if (MatchLabel(field, pattern) && MatchName(field, pattern)) {
240    if (dest)
241      *dest = field;
242    (*iter)++;
243    return true;
244  }
245
246  return false;
247}
248
249// static
250bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) {
251  // TODO(jhawkins): Handle select fields.
252  return ParseLabelAndName(iter, ASCIIToUTF16("^$"), NULL);
253}
254
255// static
256bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field,
257               const AutoFillType& type) {
258  // Several fields are optional.
259  if (field)
260    field_type_map->insert(make_pair(field->unique_name(), type.field_type()));
261
262  return true;
263}
264
265string16 FormField::GetEcmlPattern(const char* ecml_name) {
266  return ASCIIToUTF16(std::string("^") + ecml_name);
267}
268
269string16 FormField::GetEcmlPattern(const char* ecml_name1,
270                                   const char* ecml_name2,
271                                   char pattern_operator) {
272  return ASCIIToUTF16(StringPrintf("^%s%c^%s",
273      ecml_name1, pattern_operator, ecml_name2));
274}
275
276FormFieldSet::FormFieldSet(FormStructure* fields) {
277  std::vector<AddressField*> addresses;
278
279  // First, find if there is one form field with an ECML name.  If there is,
280  // then we will match an element only if it is in the standard.
281  bool is_ecml = CheckECML(fields);
282
283  // Parse fields.
284  std::vector<AutoFillField*>::const_iterator field = fields->begin();
285  while (field != fields->end() && *field != NULL) {
286    // Don't parse hidden fields or radio or checkbox controls.
287    if (LowerCaseEqualsASCII((*field)->form_control_type(),
288                             kControlTypeHidden) ||
289        LowerCaseEqualsASCII((*field)->form_control_type(),
290                             kControlTypeRadio) ||
291        LowerCaseEqualsASCII((*field)->form_control_type(),
292                             kControlTypeCheckBox)) {
293      field++;
294      continue;
295    }
296
297    FormField* form_field = FormField::ParseFormField(&field, is_ecml);
298    if (!form_field) {
299      field++;
300      continue;
301    }
302
303    push_back(form_field);
304
305    if (form_field->GetFormFieldType() == kAddressType) {
306      AddressField* address = static_cast<AddressField*>(form_field);
307      if (address->IsFullAddress())
308        addresses.push_back(address);
309    }
310  }
311
312  // Now determine an address type for each address. Note, if this is an ECML
313  // form, then we already got this info from the field names.
314  if (!is_ecml && !addresses.empty()) {
315    if (addresses.size() == 1) {
316      addresses[0]->SetType(addresses[0]->FindType());
317    } else {
318      AddressType type0 = addresses[0]->FindType();
319      AddressType type1 = addresses[1]->FindType();
320
321      // When there are two addresses on a page, they almost always appear in
322      // the order (billing, shipping).
323      bool reversed = (type0 == kShippingAddress && type1 == kBillingAddress);
324      addresses[0]->SetType(reversed ? kShippingAddress : kBillingAddress);
325      addresses[1]->SetType(reversed ? kBillingAddress : kShippingAddress);
326    }
327  }
328}
329
330bool FormFieldSet::CheckECML(FormStructure* fields) {
331  size_t num_fields = fields->field_count();
332  struct EcmlField {
333    const char* name_;
334    const int length_;
335  } form_fields[] = {
336#define ECML_STRING_ENTRY(x) { x, arraysize(x) - 1 },
337    ECML_STRING_ENTRY(kEcmlShipToTitle)
338    ECML_STRING_ENTRY(kEcmlShipToFirstName)
339    ECML_STRING_ENTRY(kEcmlShipToMiddleName)
340    ECML_STRING_ENTRY(kEcmlShipToLastName)
341    ECML_STRING_ENTRY(kEcmlShipToNameSuffix)
342    ECML_STRING_ENTRY(kEcmlShipToCompanyName)
343    ECML_STRING_ENTRY(kEcmlShipToAddress1)
344    ECML_STRING_ENTRY(kEcmlShipToAddress2)
345    ECML_STRING_ENTRY(kEcmlShipToAddress3)
346    ECML_STRING_ENTRY(kEcmlShipToCity)
347    ECML_STRING_ENTRY(kEcmlShipToStateProv)
348    ECML_STRING_ENTRY(kEcmlShipToPostalCode)
349    ECML_STRING_ENTRY(kEcmlShipToCountry)
350    ECML_STRING_ENTRY(kEcmlShipToPhone)
351    ECML_STRING_ENTRY(kEcmlShipToPhone)
352    ECML_STRING_ENTRY(kEcmlShipToEmail)
353    ECML_STRING_ENTRY(kEcmlBillToTitle)
354    ECML_STRING_ENTRY(kEcmlBillToFirstName)
355    ECML_STRING_ENTRY(kEcmlBillToMiddleName)
356    ECML_STRING_ENTRY(kEcmlBillToLastName)
357    ECML_STRING_ENTRY(kEcmlBillToNameSuffix)
358    ECML_STRING_ENTRY(kEcmlBillToCompanyName)
359    ECML_STRING_ENTRY(kEcmlBillToAddress1)
360    ECML_STRING_ENTRY(kEcmlBillToAddress2)
361    ECML_STRING_ENTRY(kEcmlBillToAddress3)
362    ECML_STRING_ENTRY(kEcmlBillToCity)
363    ECML_STRING_ENTRY(kEcmlBillToStateProv)
364    ECML_STRING_ENTRY(kEcmlBillToPostalCode)
365    ECML_STRING_ENTRY(kEcmlBillToCountry)
366    ECML_STRING_ENTRY(kEcmlBillToPhone)
367    ECML_STRING_ENTRY(kEcmlBillToPhone)
368    ECML_STRING_ENTRY(kEcmlBillToEmail)
369    ECML_STRING_ENTRY(kEcmlCardHolder)
370    ECML_STRING_ENTRY(kEcmlCardType)
371    ECML_STRING_ENTRY(kEcmlCardNumber)
372    ECML_STRING_ENTRY(kEcmlCardVerification)
373    ECML_STRING_ENTRY(kEcmlCardExpireMonth)
374    ECML_STRING_ENTRY(kEcmlCardExpireYear)
375#undef ECML_STRING_ENTRY
376  };
377
378  const string16 ecom(ASCIIToUTF16("ecom"));
379  for (size_t index = 0; index < num_fields; ++index) {
380    const string16& utf16_name = fields->field(index)->name();
381    if (StartsWith(utf16_name, ecom, true)) {
382      std::string name(UTF16ToASCII(utf16_name));
383      for (size_t i = 0; i < ARRAYSIZE_UNSAFE(form_fields); ++i) {
384        if (base::strncasecmp(name.c_str(), form_fields[i].name_,
385                              form_fields[i].length_) == 0) {
386          return true;
387        }
388      }
389    }
390  }
391
392  return false;
393}
394