form_field.cc revision 7375b0cf763978c2988cd0784818fbedd11ce85a
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autofill/form_field.h"
6
7#include "app/l10n_util.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/autofill/address_field.h"
11#include "chrome/browser/autofill/autofill_field.h"
12#include "chrome/browser/autofill/credit_card_field.h"
13#include "chrome/browser/autofill/fax_field.h"
14#include "chrome/browser/autofill/name_field.h"
15#include "chrome/browser/autofill/phone_field.h"
16#include "third_party/WebKit/WebKit/chromium/public/WebRegularExpression.h"
17#include "third_party/WebKit/WebKit/chromium/public/WebString.h"
18#include "grit/autofill_resources.h"
19
20// Field names from the ECML specification; see RFC 3106.  We've
21// made these names lowercase since we convert labels and field names to
22// lowercase before searching.
23
24// shipping name/address fields
25const char kEcmlShipToTitle[] = "ecom_shipto_postal_name_prefix";
26const char kEcmlShipToFirstName[] = "ecom_shipto_postal_name_first";
27const char kEcmlShipToMiddleName[] = "ecom_shipto_postal_name_middle";
28const char kEcmlShipToLastName[] = "ecom_shipto_postal_name_last";
29const char kEcmlShipToNameSuffix[] = "ecom_shipto_postal_name_suffix";
30const char kEcmlShipToCompanyName[] = "ecom_shipto_postal_company";
31const char kEcmlShipToAddress1[] = "ecom_shipto_postal_street_line1";
32const char kEcmlShipToAddress2[] = "ecom_shipto_postal_street_line2";
33const char kEcmlShipToAddress3[] = "ecom_shipto_postal_street_line3";
34const char kEcmlShipToCity[] = "ecom_shipto_postal_city";
35const char kEcmlShipToStateProv[] = "ecom_shipto_postal_stateprov";
36const char kEcmlShipToPostalCode[] = "ecom_shipto_postal_postalcode";
37const char kEcmlShipToCountry[] = "ecom_shipto_postal_countrycode";
38const char kEcmlShipToPhone[] = "ecom_shipto_telecom_phone_number";
39const char kEcmlShipToEmail[] = "ecom_shipto_online_email";
40
41// billing name/address fields
42const char kEcmlBillToTitle[] = "ecom_billto_postal_name_prefix";
43const char kEcmlBillToFirstName[] = "ecom_billto_postal_name_first";
44const char kEcmlBillToMiddleName[] = "ecom_billto_postal_name_middle";
45const char kEcmlBillToLastName[] = "ecom_billto_postal_name_last";
46const char kEcmlBillToNameSuffix[] = "ecom_billto_postal_name_suffix";
47const char kEcmlBillToCompanyName[] = "ecom_billto_postal_company";
48const char kEcmlBillToAddress1[] = "ecom_billto_postal_street_line1";
49const char kEcmlBillToAddress2[] = "ecom_billto_postal_street_line2";
50const char kEcmlBillToAddress3[] = "ecom_billto_postal_street_line3";
51const char kEcmlBillToCity[] = "ecom_billto_postal_city";
52const char kEcmlBillToStateProv[] = "ecom_billto_postal_stateprov";
53const char kEcmlBillToPostalCode[] = "ecom_billto_postal_postalcode";
54const char kEcmlBillToCountry[] = "ecom_billto_postal_countrycode";
55const char kEcmlBillToPhone[] = "ecom_billto_telecom_phone_number";
56const char kEcmlBillToEmail[] = "ecom_billto_online_email";
57
58// credit card fields
59const char kEcmlCardHolder[] = "ecom_payment_card_name";
60const char kEcmlCardType[] = "ecom_payment_card_type";
61const char kEcmlCardNumber[] = "ecom_payment_card_number";
62const char kEcmlCardVerification[] = "ecom_payment_card_verification";
63const char kEcmlCardExpireDay[] = "ecom_payment_card_expdate_day";
64const char kEcmlCardExpireMonth[] = "ecom_payment_card_expdate_month";
65const char kEcmlCardExpireYear[] = "ecom_payment_card_expdate_year";
66
67namespace {
68
69// The name of the hidden form control element.
70const char* const kControlTypeHidden = "hidden";
71
72// The name of the radio form control element.
73const char* const kControlTypeRadio = "radio";
74
75// The name of the checkbox form control element.
76const char* const kControlTypeCheckBox = "checkbox";
77
78}  // namespace
79
80class EmailField : public FormField {
81 public:
82  virtual bool GetFieldInfo(FieldTypeMap* field_type_map) const {
83    bool ok = Add(field_type_map, field_, AutoFillType(EMAIL_ADDRESS));
84    DCHECK(ok);
85    return true;
86  }
87
88  static EmailField* Parse(std::vector<AutoFillField*>::const_iterator* iter,
89                          bool is_ecml) {
90    string16 pattern;
91    if (is_ecml) {
92      pattern = GetEcmlPattern(kEcmlShipToEmail, kEcmlBillToEmail, '|');
93    } else {
94      pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_EMAIL_RE);
95    }
96
97    AutoFillField* field;
98    if (ParseText(iter, pattern, &field))
99      return new EmailField(field);
100
101    return NULL;
102  }
103
104 private:
105  explicit EmailField(AutoFillField *field) : field_(field) {}
106
107  AutoFillField* field_;
108};
109
110FormFieldType FormField::GetFormFieldType() const {
111  return kOtherFieldType;
112}
113
114// static
115bool FormField::Match(AutoFillField* field,
116                      const string16& pattern,
117                      bool match_label_only) {
118  if (match_label_only) {
119    if (MatchLabel(field, pattern)) {
120      return true;
121    }
122  } else {
123    // For now, we apply the same pattern to the field's label and the field's
124    // name.  Matching the name is a bit of a long shot for many patterns, but
125    // it generally doesn't hurt to try.
126    if (MatchLabel(field, pattern) || MatchName(field, pattern)) {
127      return true;
128    }
129  }
130
131  return false;
132}
133
134// static
135bool FormField::MatchName(AutoFillField* field, const string16& pattern) {
136  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
137  // be fixed to take WebTextCaseInsensitive into account.
138  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
139                                  WebKit::WebTextCaseInsensitive);
140  bool match = re.match(
141      WebKit::WebString(StringToLowerASCII(field->name()))) != -1;
142  return match;
143}
144
145// static
146bool FormField::MatchLabel(AutoFillField* field, const string16& pattern) {
147  // TODO(jhawkins): Remove StringToLowerASCII.  WebRegularExpression needs to
148  // be fixed to take WebTextCaseInsensitive into account.
149  WebKit::WebRegularExpression re(WebKit::WebString(pattern),
150                                  WebKit::WebTextCaseInsensitive);
151  bool match = re.match(
152      WebKit::WebString(StringToLowerASCII(field->label()))) != -1;
153  return match;
154}
155
156// static
157FormField* FormField::ParseFormField(
158    std::vector<AutoFillField*>::const_iterator* iter,
159    bool is_ecml) {
160  FormField *field;
161  field = EmailField::Parse(iter, is_ecml);
162  if (field != NULL)
163    return field;
164  field = PhoneField::Parse(iter, is_ecml);
165  if (field != NULL)
166    return field;
167  field = FaxField::Parse(iter);
168  if (field != NULL)
169    return field;
170  field = AddressField::Parse(iter, is_ecml);
171  if (field != NULL)
172    return field;
173  field = CreditCardField::Parse(iter, is_ecml);
174  if (field != NULL)
175    return field;
176
177  // We search for a NameField last since it matches the word "name", which is
178  // relatively general.
179  return NameField::Parse(iter, is_ecml);
180}
181
182// static
183bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
184                          const string16& pattern) {
185  AutoFillField* field;
186  return ParseText(iter, pattern, &field);
187}
188
189// static
190bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
191                          const string16& pattern,
192                          AutoFillField** dest) {
193  return ParseText(iter, pattern, dest, false);
194}
195
196// static
197bool FormField::ParseEmptyText(
198    std::vector<AutoFillField*>::const_iterator* iter,
199    AutoFillField** dest) {
200  return ParseLabelText(iter, ASCIIToUTF16("^$"), dest);
201}
202
203// static
204bool FormField::ParseLabelText(
205    std::vector<AutoFillField*>::const_iterator* iter,
206    const string16& pattern,
207    AutoFillField** dest) {
208  return ParseText(iter, pattern, dest, true);
209}
210
211// static
212bool FormField::ParseText(std::vector<AutoFillField*>::const_iterator* iter,
213                          const string16& pattern,
214                          AutoFillField** dest,
215                          bool match_label_only) {
216  // Some forms have one or more hidden fields before each visible input; skip
217  // past these.
218  while (**iter && LowerCaseEqualsASCII((**iter)->form_control_type(),
219                                        kControlTypeHidden))
220    (*iter)++;
221
222  AutoFillField* field = **iter;
223  if (!field)
224    return false;
225
226  if (Match(field, pattern, match_label_only)) {
227    if (dest)
228      *dest = field;
229    (*iter)++;
230    return true;
231  }
232
233  return false;
234}
235
236// static
237bool FormField::ParseLabelAndName(
238    std::vector<AutoFillField*>::const_iterator* iter,
239    const string16& pattern,
240    AutoFillField** dest) {
241  AutoFillField* field = **iter;
242  if (!field)
243    return false;
244
245  if (MatchLabel(field, pattern) && MatchName(field, pattern)) {
246    if (dest)
247      *dest = field;
248    (*iter)++;
249    return true;
250  }
251
252  return false;
253}
254
255// static
256bool FormField::ParseEmpty(std::vector<AutoFillField*>::const_iterator* iter) {
257  // TODO(jhawkins): Handle select fields.
258  return ParseLabelAndName(iter, ASCIIToUTF16("^$"), NULL);
259}
260
261// static
262bool FormField::Add(FieldTypeMap* field_type_map, AutoFillField* field,
263               const AutoFillType& type) {
264  // Several fields are optional.
265  if (field)
266    field_type_map->insert(make_pair(field->unique_name(), type.field_type()));
267
268  return true;
269}
270
271string16 FormField::GetEcmlPattern(const char* ecml_name) {
272  return ASCIIToUTF16(std::string("^") + ecml_name);
273}
274
275string16 FormField::GetEcmlPattern(const char* ecml_name1,
276                                   const char* ecml_name2,
277                                   char pattern_operator) {
278  return ASCIIToUTF16(StringPrintf("^%s%c^%s",
279      ecml_name1, pattern_operator, ecml_name2));
280}
281
282FormFieldSet::FormFieldSet(FormStructure* fields) {
283  std::vector<AddressField*> addresses;
284
285  // First, find if there is one form field with an ECML name.  If there is,
286  // then we will match an element only if it is in the standard.
287  bool is_ecml = CheckECML(fields);
288
289  // Parse fields.
290  std::vector<AutoFillField*>::const_iterator field = fields->begin();
291  while (field != fields->end() && *field != NULL) {
292    // Don't parse hidden fields or radio or checkbox controls.
293    if (LowerCaseEqualsASCII((*field)->form_control_type(),
294                             kControlTypeHidden) ||
295        LowerCaseEqualsASCII((*field)->form_control_type(),
296                             kControlTypeRadio) ||
297        LowerCaseEqualsASCII((*field)->form_control_type(),
298                             kControlTypeCheckBox)) {
299      field++;
300      continue;
301    }
302
303    FormField* form_field = FormField::ParseFormField(&field, is_ecml);
304    if (!form_field) {
305      field++;
306      continue;
307    }
308
309    push_back(form_field);
310
311    if (form_field->GetFormFieldType() == kAddressType) {
312      AddressField* address = static_cast<AddressField*>(form_field);
313      if (address->IsFullAddress())
314        addresses.push_back(address);
315    }
316  }
317
318  // Now determine an address type for each address. Note, if this is an ECML
319  // form, then we already got this info from the field names.
320  if (!is_ecml && !addresses.empty()) {
321    if (addresses.size() == 1) {
322      addresses[0]->SetType(addresses[0]->FindType());
323    } else {
324      AddressType type0 = addresses[0]->FindType();
325      AddressType type1 = addresses[1]->FindType();
326
327      // When there are two addresses on a page, they almost always appear in
328      // the order (billing, shipping).
329      bool reversed = (type0 == kShippingAddress && type1 == kBillingAddress);
330      addresses[0]->SetType(reversed ? kShippingAddress : kBillingAddress);
331      addresses[1]->SetType(reversed ? kBillingAddress : kShippingAddress);
332    }
333  }
334}
335
336bool FormFieldSet::CheckECML(FormStructure* fields) {
337  size_t num_fields = fields->field_count();
338  struct EcmlField {
339    const char* name_;
340    const int length_;
341  } form_fields[] = {
342#define ECML_STRING_ENTRY(x) { x, arraysize(x) - 1 },
343    ECML_STRING_ENTRY(kEcmlShipToTitle)
344    ECML_STRING_ENTRY(kEcmlShipToFirstName)
345    ECML_STRING_ENTRY(kEcmlShipToMiddleName)
346    ECML_STRING_ENTRY(kEcmlShipToLastName)
347    ECML_STRING_ENTRY(kEcmlShipToNameSuffix)
348    ECML_STRING_ENTRY(kEcmlShipToCompanyName)
349    ECML_STRING_ENTRY(kEcmlShipToAddress1)
350    ECML_STRING_ENTRY(kEcmlShipToAddress2)
351    ECML_STRING_ENTRY(kEcmlShipToAddress3)
352    ECML_STRING_ENTRY(kEcmlShipToCity)
353    ECML_STRING_ENTRY(kEcmlShipToStateProv)
354    ECML_STRING_ENTRY(kEcmlShipToPostalCode)
355    ECML_STRING_ENTRY(kEcmlShipToCountry)
356    ECML_STRING_ENTRY(kEcmlShipToPhone)
357    ECML_STRING_ENTRY(kEcmlShipToPhone)
358    ECML_STRING_ENTRY(kEcmlShipToEmail)
359    ECML_STRING_ENTRY(kEcmlBillToTitle)
360    ECML_STRING_ENTRY(kEcmlBillToFirstName)
361    ECML_STRING_ENTRY(kEcmlBillToMiddleName)
362    ECML_STRING_ENTRY(kEcmlBillToLastName)
363    ECML_STRING_ENTRY(kEcmlBillToNameSuffix)
364    ECML_STRING_ENTRY(kEcmlBillToCompanyName)
365    ECML_STRING_ENTRY(kEcmlBillToAddress1)
366    ECML_STRING_ENTRY(kEcmlBillToAddress2)
367    ECML_STRING_ENTRY(kEcmlBillToAddress3)
368    ECML_STRING_ENTRY(kEcmlBillToCity)
369    ECML_STRING_ENTRY(kEcmlBillToStateProv)
370    ECML_STRING_ENTRY(kEcmlBillToPostalCode)
371    ECML_STRING_ENTRY(kEcmlBillToCountry)
372    ECML_STRING_ENTRY(kEcmlBillToPhone)
373    ECML_STRING_ENTRY(kEcmlBillToPhone)
374    ECML_STRING_ENTRY(kEcmlBillToEmail)
375    ECML_STRING_ENTRY(kEcmlCardHolder)
376    ECML_STRING_ENTRY(kEcmlCardType)
377    ECML_STRING_ENTRY(kEcmlCardNumber)
378    ECML_STRING_ENTRY(kEcmlCardVerification)
379    ECML_STRING_ENTRY(kEcmlCardExpireMonth)
380    ECML_STRING_ENTRY(kEcmlCardExpireYear)
381#undef ECML_STRING_ENTRY
382  };
383
384  const string16 ecom(ASCIIToUTF16("ecom"));
385  for (size_t index = 0; index < num_fields; ++index) {
386    const string16& utf16_name = fields->field(index)->name();
387    if (StartsWith(utf16_name, ecom, true)) {
388      std::string name(UTF16ToASCII(utf16_name));
389      for (size_t i = 0; i < ARRAYSIZE_UNSAFE(form_fields); ++i) {
390        if (base::strncasecmp(name.c_str(), form_fields[i].name_,
391                              form_fields[i].length_) == 0) {
392          return true;
393        }
394      }
395    }
396  }
397
398  return false;
399}
400