form_field.cc revision 21d179b334e59e9a3bfcaed4c4430bef1bc5759d
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autofill/form_field.h"
6
7#include "app/l10n_util.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/autofill/address_field.h"
11#include "chrome/browser/autofill/autofill_field.h"
12#include "chrome/browser/autofill/credit_card_field.h"
13#include "chrome/browser/autofill/fax_field.h"
14#include "chrome/browser/autofill/name_field.h"
15#include "chrome/browser/autofill/phone_field.h"
16#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h"
17#include "third_party/WebKit/WebKit/chromium/public/WebString.h"
18#ifndef ANDROID
19#include "grit/autofill_resources.h"
20#endif
21
22// Field names from the ECML specification; see RFC 3106.  We've
23// made these names lowercase since we convert labels and field names to
24// lowercase before searching.
25
26// shipping name/address fields
27const char kEcmlShipToTitle[] = "ecom_shipto_postal_name_prefix";
28const char kEcmlShipToFirstName[] = "ecom_shipto_postal_name_first";
29const char kEcmlShipToMiddleName[] = "ecom_shipto_postal_name_middle";
30const char kEcmlShipToLastName[] = "ecom_shipto_postal_name_last";
31const char kEcmlShipToNameSuffix[] = "ecom_shipto_postal_name_suffix";
32const char kEcmlShipToCompanyName[] = "ecom_shipto_postal_company";
33const char kEcmlShipToAddress1[] = "ecom_shipto_postal_street_line1";
34const char kEcmlShipToAddress2[] = "ecom_shipto_postal_street_line2";
35const char kEcmlShipToAddress3[] = "ecom_shipto_postal_street_line3";
36const char kEcmlShipToCity[] = "ecom_shipto_postal_city";
37const char kEcmlShipToStateProv[] = "ecom_shipto_postal_stateprov";
38const char kEcmlShipToPostalCode[] = "ecom_shipto_postal_postalcode";
39const char kEcmlShipToCountry[] = "ecom_shipto_postal_countrycode";
40const char kEcmlShipToPhone[] = "ecom_shipto_telecom_phone_number";
41const char kEcmlShipToEmail[] = "ecom_shipto_online_email";
42
43// billing name/address fields
44const char kEcmlBillToTitle[] = "ecom_billto_postal_name_prefix";
45const char kEcmlBillToFirstName[] = "ecom_billto_postal_name_first";
46const char kEcmlBillToMiddleName[] = "ecom_billto_postal_name_middle";
47const char kEcmlBillToLastName[] = "ecom_billto_postal_name_last";
48const char kEcmlBillToNameSuffix[] = "ecom_billto_postal_name_suffix";
49const char kEcmlBillToCompanyName[] = "ecom_billto_postal_company";
50const char kEcmlBillToAddress1[] = "ecom_billto_postal_street_line1";
51const char kEcmlBillToAddress2[] = "ecom_billto_postal_street_line2";
52const char kEcmlBillToAddress3[] = "ecom_billto_postal_street_line3";
53const char kEcmlBillToCity[] = "ecom_billto_postal_city";
54const char kEcmlBillToStateProv[] = "ecom_billto_postal_stateprov";
55const char kEcmlBillToPostalCode[] = "ecom_billto_postal_postalcode";
56const char kEcmlBillToCountry[] = "ecom_billto_postal_countrycode";
57const char kEcmlBillToPhone[] = "ecom_billto_telecom_phone_number";
58const char kEcmlBillToEmail[] = "ecom_billto_online_email";
59
60// credit card fields
61const char kEcmlCardHolder[] = "ecom_payment_card_name";
62const char kEcmlCardType[] = "ecom_payment_card_type";
63const char kEcmlCardNumber[] = "ecom_payment_card_number";
64const char kEcmlCardVerification[] = "ecom_payment_card_verification";
65const char kEcmlCardExpireDay[] = "ecom_payment_card_expdate_day";
66const char kEcmlCardExpireMonth[] = "ecom_payment_card_expdate_month";
67const char kEcmlCardExpireYear[] = "ecom_payment_card_expdate_year";
68
69namespace {
70
71// The name of the hidden form control element.
72const char* const kControlTypeHidden = "hidden";
73
74// The name of the radio form control element.
75const char* const kControlTypeRadio = "radio";
76
77// The name of the checkbox form control element.
78const char* const kControlTypeCheckBox = "checkbox";
79
80}  // namespace
81
82class EmailField : public FormField {
83 public:
84  virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const {
85    bool ok = Add(field_type_map, field_, AutoFillType(EMAIL_ADDRESS));
86    DCHECK(ok);
87    return true;
88  }
89
90  static EmailField* Parse(std::vector<AutoFillField*>::const_iterator* iter,
91                          bool is_ecml) {
92    string16 pattern;
93    if (is_ecml) {
94      pattern = GetEcmlPattern(kEcmlShipToEmail, kEcmlBillToEmail, '|');
95    } else {
96      pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_EMAIL_RE);
97    }
98
99    AutoFillField* field;
100    if (ParseText(iter, pattern, &field))
101      return new EmailField(field);
102
103    return NULL;
104  }
105
106 private:
107  explicit EmailField(AutoFillField *field) : field_(field) {}
108
109  AutoFillField* field_;
110};
111
112FormFieldType FormField::GetFormFieldType() const {
113  return kOtherFieldType;
114}
115
116// static
117bool FormField::Match(AutoFillField* field,
118                      const string16& pattern,
119                      bool match_label_only) {
120  if (match_label_only) {
121    if (MatchLabel(field, pattern)) {
122      return true;
123    }
124  } else {
125    // For now, we apply the same pattern to the field's label and the field's
126    // name.  Matching the name is a bit of a long shot for many patterns, but
127    // it generally doesn't hurt to try.
128    if (MatchLabel(field, pattern) || MatchName(field, pattern)) {
129      return true;
130    }
131  }
132
133  return false;
134}
135
136// static
137bool FormField::MatchName(AutoFillField* field, const string16& pattern) {
138  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
139  // be fixed to take WebTextCaseInsensitive into account.
140  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
141                                  WebKit::WebTextCaseInsensitive);
142  bool match = re.match(
143      WebKit::WebString(StringToLowerASCII(field->name()))) != -1;
144  return match;
145}
146
147// static
148bool FormField::MatchLabel(AutoFillField* field, const string16& pattern) {
149  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
150  // be fixed to take WebTextCaseInsensitive into account.
151  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
152                                  WebKit::WebTextCaseInsensitive);
153  bool match = re.match(
154      WebKit::WebString(StringToLowerASCII(field->label()))) != -1;
155  return match;
156}
157
158// static
159FormField* FormField::ParseFormField(
160    std::vector<AutoFillField*>::const_iterator* iter,
161    bool is_ecml) {
162  FormField *field;
163  field = EmailField::Parse(iter, is_ecml);
164  if (field != NULL)
165    return field;
166  field = PhoneField::Parse(iter, is_ecml);
167  if (field != NULL)
168    return field;
169  field = FaxField::Parse(iter);
170  if (field != NULL)
171    return field;
172  field = AddressField::Parse(iter, is_ecml);
173  if (field != NULL)
174    return field;
175  field = CreditCardField::Parse(iter, is_ecml);
176  if (field != NULL)
177    return field;
178
179  // We search for a NameField last since it matches the word "name", which is
180  // relatively general.
181  return NameField::Parse(iter, is_ecml);
182}
183
184// static
185bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
186                          const string16& pattern) {
187  AutoFillField* field;
188  return ParseText(iter, pattern, &field);
189}
190
191// static
192bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
193                          const string16& pattern,
194                          AutoFillField** dest) {
195  return ParseText(iter, pattern, dest, false);
196}
197
198// static
199bool FormField::ParseEmptyText(
200    std::vector<AutoFillField*>::const_iterator* iter,
201    AutoFillField** dest) {
202  return ParseLabelText(iter, ASCIIToUTF16("^$"), dest);
203}
204
205// static
206bool FormField::ParseLabelText(
207    std::vector<AutoFillField*>::const_iterator* iter,
208    const string16& pattern,
209    AutoFillField** dest) {
210  return ParseText(iter, pattern, dest, true);
211}
212
213// static
214bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
215                          const string16& pattern,
216                          AutoFillField** dest,
217                          bool match_label_only) {
218  // Some forms have one or more hidden fields before each visible input; skip
219  // past these.
220  while (**iter && LowerCaseEqualsASCII((**iter)->form_control_type(),
221                                        kControlTypeHidden))
222    (*iter)++;
223
224  AutoFillField* field = **iter;
225  if (!field)
226    return false;
227
228  if (Match(field, pattern, match_label_only)) {
229    if (dest)
230      *dest = field;
231    (*iter)++;
232    return true;
233  }
234
235  return false;
236}
237
238// static
239bool FormField::ParseLabelAndName(
240    std::vector<AutoFillField*>::const_iterator* iter,
241    const string16& pattern,
242    AutoFillField** dest) {
243  AutoFillField* field = **iter;
244  if (!field)
245    return false;
246
247  if (MatchLabel(field, pattern) && MatchName(field, pattern)) {
248    if (dest)
249      *dest = field;
250    (*iter)++;
251    return true;
252  }
253
254  return false;
255}
256
257// static
258bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) {
259  // TODO(jhawkins): Handle select fields.
260  return ParseLabelAndName(iter, ASCIIToUTF16("^$"), NULL);
261}
262
263// static
264bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field,
265               const AutoFillType& type) {
266  // Several fields are optional.
267  if (field)
268    field_type_map->insert(make_pair(field->unique_name(), type.field_type()));
269
270  return true;
271}
272
273string16 FormField::GetEcmlPattern(const char* ecml_name) {
274  return ASCIIToUTF16(std::string("^") + ecml_name);
275}
276
277string16 FormField::GetEcmlPattern(const char* ecml_name1,
278                                   const char* ecml_name2,
279                                   char pattern_operator) {
280  return ASCIIToUTF16(StringPrintf("^%s%c^%s",
281      ecml_name1, pattern_operator, ecml_name2));
282}
283
284FormFieldSet::FormFieldSet(FormStructure* fields) {
285  std::vector<AddressField*> addresses;
286
287  // First, find if there is one form field with an ECML name.  If there is,
288  // then we will match an element only if it is in the standard.
289  bool is_ecml = CheckECML(fields);
290
291  // Parse fields.
292  std::vector<AutoFillField*>::const_iterator field = fields->begin();
293  while (field != fields->end() && *field != NULL) {
294    // Don't parse hidden fields or radio or checkbox controls.
295    if (LowerCaseEqualsASCII((*field)->form_control_type(),
296                             kControlTypeHidden) ||
297        LowerCaseEqualsASCII((*field)->form_control_type(),
298                             kControlTypeRadio) ||
299        LowerCaseEqualsASCII((*field)->form_control_type(),
300                             kControlTypeCheckBox)) {
301      field++;
302      continue;
303    }
304
305    FormField* form_field = FormField::ParseFormField(&field, is_ecml);
306    if (!form_field) {
307      field++;
308      continue;
309    }
310
311    push_back(form_field);
312
313    if (form_field->GetFormFieldType() == kAddressType) {
314      AddressField* address = static_cast<AddressField*>(form_field);
315      if (address->IsFullAddress())
316        addresses.push_back(address);
317    }
318  }
319
320  // Now determine an address type for each address. Note, if this is an ECML
321  // form, then we already got this info from the field names.
322  if (!is_ecml && !addresses.empty()) {
323    if (addresses.size() == 1) {
324      addresses[0]->SetType(addresses[0]->FindType());
325    } else {
326      AddressType type0 = addresses[0]->FindType();
327      AddressType type1 = addresses[1]->FindType();
328
329      // When there are two addresses on a page, they almost always appear in
330      // the order (billing, shipping).
331      bool reversed = (type0 == kShippingAddress && type1 == kBillingAddress);
332      addresses[0]->SetType(reversed ? kShippingAddress : kBillingAddress);
333      addresses[1]->SetType(reversed ? kBillingAddress : kShippingAddress);
334    }
335  }
336}
337
338bool FormFieldSet::CheckECML(FormStructure* fields) {
339  size_t num_fields = fields->field_count();
340  struct EcmlField {
341    const char* name_;
342    const int length_;
343  } form_fields[] = {
344#define ECML_STRING_ENTRY(x) { x, arraysize(x) - 1 },
345    ECML_STRING_ENTRY(kEcmlShipToTitle)
346    ECML_STRING_ENTRY(kEcmlShipToFirstName)
347    ECML_STRING_ENTRY(kEcmlShipToMiddleName)
348    ECML_STRING_ENTRY(kEcmlShipToLastName)
349    ECML_STRING_ENTRY(kEcmlShipToNameSuffix)
350    ECML_STRING_ENTRY(kEcmlShipToCompanyName)
351    ECML_STRING_ENTRY(kEcmlShipToAddress1)
352    ECML_STRING_ENTRY(kEcmlShipToAddress2)
353    ECML_STRING_ENTRY(kEcmlShipToAddress3)
354    ECML_STRING_ENTRY(kEcmlShipToCity)
355    ECML_STRING_ENTRY(kEcmlShipToStateProv)
356    ECML_STRING_ENTRY(kEcmlShipToPostalCode)
357    ECML_STRING_ENTRY(kEcmlShipToCountry)
358    ECML_STRING_ENTRY(kEcmlShipToPhone)
359    ECML_STRING_ENTRY(kEcmlShipToPhone)
360    ECML_STRING_ENTRY(kEcmlShipToEmail)
361    ECML_STRING_ENTRY(kEcmlBillToTitle)
362    ECML_STRING_ENTRY(kEcmlBillToFirstName)
363    ECML_STRING_ENTRY(kEcmlBillToMiddleName)
364    ECML_STRING_ENTRY(kEcmlBillToLastName)
365    ECML_STRING_ENTRY(kEcmlBillToNameSuffix)
366    ECML_STRING_ENTRY(kEcmlBillToCompanyName)
367    ECML_STRING_ENTRY(kEcmlBillToAddress1)
368    ECML_STRING_ENTRY(kEcmlBillToAddress2)
369    ECML_STRING_ENTRY(kEcmlBillToAddress3)
370    ECML_STRING_ENTRY(kEcmlBillToCity)
371    ECML_STRING_ENTRY(kEcmlBillToStateProv)
372    ECML_STRING_ENTRY(kEcmlBillToPostalCode)
373    ECML_STRING_ENTRY(kEcmlBillToCountry)
374    ECML_STRING_ENTRY(kEcmlBillToPhone)
375    ECML_STRING_ENTRY(kEcmlBillToPhone)
376    ECML_STRING_ENTRY(kEcmlBillToEmail)
377    ECML_STRING_ENTRY(kEcmlCardHolder)
378    ECML_STRING_ENTRY(kEcmlCardType)
379    ECML_STRING_ENTRY(kEcmlCardNumber)
380    ECML_STRING_ENTRY(kEcmlCardVerification)
381    ECML_STRING_ENTRY(kEcmlCardExpireMonth)
382    ECML_STRING_ENTRY(kEcmlCardExpireYear)
383#undef ECML_STRING_ENTRY
384  };
385
386  const string16 ecom(ASCIIToUTF16("ecom"));
387  for (size_t index = 0; index < num_fields; ++index) {
388    const string16& utf16_name = fields->field(index)->name();
389    if (StartsWith(utf16_name, ecom, true)) {
390      std::string name(UTF16ToASCII(utf16_name));
391      for (size_t i = 0; i < ARRAYSIZE_UNSAFE(form_fields); ++i) {
392        if (base::strncasecmp(name.c_str(), form_fields[i].name_,
393                              form_fields[i].length_) == 0) {
394          return true;
395        }
396      }
397    }
398  }
399
400  return false;
401}
402