credit_card_field.cc revision 201ade2fbba22bfb27ae029f4d23fca6ded109a0
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autofill/credit_card_field.h"
6
7#include "base/scoped_ptr.h"
8#include "base/string16.h"
9#include "base/utf_string_conversions.h"
10#include "chrome/browser/autofill/autofill_field.h"
11
12bool CreditCardField::GetFieldInfo(FieldTypeMap* field_type_map) const {
13  bool ok = Add(field_type_map, number_, AutoFillType(CREDIT_CARD_NUMBER));
14  DCHECK(ok);
15
16  // If the heuristics detected first and last name in separate fields,
17  // then ignore both fields. Putting them into separate fields is probably
18  // wrong, because the credit card can also contain a middle name or middle
19  // initial.
20  if (cardholder_last_ == NULL) {
21    // Add() will check if cardholder_ is != NULL.
22    ok = ok && Add(field_type_map, cardholder_, AutoFillType(CREDIT_CARD_NAME));
23    DCHECK(ok);
24  }
25
26  ok = ok && Add(field_type_map, type_, AutoFillType(CREDIT_CARD_TYPE));
27  DCHECK(ok);
28  ok = ok && Add(field_type_map, expiration_month_,
29      AutoFillType(CREDIT_CARD_EXP_MONTH));
30  DCHECK(ok);
31  ok = ok && Add(field_type_map, expiration_year_,
32      AutoFillType(CREDIT_CARD_EXP_4_DIGIT_YEAR));
33  DCHECK(ok);
34
35  return ok;
36}
37
38FormFieldType CreditCardField::GetFormFieldType() const {
39  return kCreditCardType;
40}
41
42// static
43CreditCardField* CreditCardField::Parse(
44    std::vector<AutoFillField*>::const_iterator* iter,
45    bool is_ecml) {
46  scoped_ptr<CreditCardField> credit_card_field(new CreditCardField);
47  std::vector<AutoFillField*>::const_iterator q = *iter;
48  string16 pattern;
49
50  // Credit card fields can appear in many different orders.
51  // We loop until no more credit card related fields are found, see |break| at
52  // bottom of the loop.
53  for (int fields = 0; true; ++fields) {
54    // Sometimes the cardholder field is just labeled "name". Unfortunately this
55    // is a dangerously generic word to search for, since it will often match a
56    // name (not cardholder name) field before or after credit card fields. So
57    // we search for "name" only when we've already parsed at least one other
58    // credit card field and haven't yet parsed the expiration date (which
59    // usually appears at the end).
60    if (credit_card_field->cardholder_ == NULL) {
61      string16 name_pattern;
62      if (is_ecml) {
63        name_pattern = GetEcmlPattern(kEcmlCardHolder);
64      } else {
65        if (fields == 0 || credit_card_field->expiration_month_) {
66          // at beginning or end
67          name_pattern = ASCIIToUTF16("card holder|name on card|nameoncard");
68        } else {
69          name_pattern = ASCIIToUTF16("name");
70        }
71      }
72
73      if (ParseText(&q, name_pattern, &credit_card_field->cardholder_))
74        continue;
75
76      // As a hard-coded hack for Expedia's billing pages (expedia_checkout.html
77      // and ExpediaBilling.html in our test suite), recognize separate fields
78      // for the cardholder's first and last name if they have the labels "cfnm"
79      // and "clnm".
80      std::vector<AutoFillField*>::const_iterator p = q;
81      AutoFillField* first;
82      if (!is_ecml && ParseText(&p, ASCIIToUTF16("^cfnm"), &first) &&
83          ParseText(&p, ASCIIToUTF16("^clnm"),
84                    &credit_card_field->cardholder_last_)) {
85        credit_card_field->cardholder_ = first;
86        q = p;
87        continue;
88      }
89    }
90
91    // We look for a card security code before we look for a credit
92    // card number and match the general term "number".  The security code
93    // has a plethora of names; we've seen "verification #",
94    // "verification number", "card identification number" and others listed
95    // in the |pattern| below.
96    if (is_ecml) {
97      pattern = GetEcmlPattern(kEcmlCardVerification);
98    } else {
99      pattern = ASCIIToUTF16("verification|card identification|cvn|"
100                             "security code|cvv code|cvc");
101    }
102
103    if (credit_card_field->verification_ == NULL &&
104        ParseText(&q, pattern, &credit_card_field->verification_))
105      continue;
106
107    // TODO(jhawkins): Parse the type select control.
108
109    if (is_ecml)
110      pattern = GetEcmlPattern(kEcmlCardNumber);
111    else
112      pattern = ASCIIToUTF16("number|card #|card no.|card_number|card number");
113
114    if (credit_card_field->number_ == NULL && ParseText(&q, pattern,
115        &credit_card_field->number_))
116      continue;
117
118    // "Expiration date" is the most common label here, but some pages have
119    // "Expires", "exp. date" or "exp. month" and "exp. year".  We also look for
120    // the field names ccmonth and ccyear, which appear on at least 4 of our
121    // test pages.
122    //
123    // -> On at least one page (The China Shop2.html) we find only the labels
124    // "month" and "year".  So for now we match these words directly; we'll
125    // see if this turns out to be too general.
126    //
127    // Toolbar Bug 51451: indeed, simply matching "month" is too general for
128    //   https://rps.fidelity.com/ftgw/rps/RtlCust/CreatePIN/Init.
129    // Instead, we match only words beginning with "month".
130    if (is_ecml)
131      pattern = GetEcmlPattern(kEcmlCardExpireMonth);
132    else
133      pattern = ASCIIToUTF16("expir|exp.*month|exp date|ccmonth");
134
135    if ((!credit_card_field->expiration_month_ ||
136        credit_card_field->expiration_month_->IsEmpty()) &&
137        ParseText(&q, pattern, &credit_card_field->expiration_month_)) {
138      if (is_ecml)
139        pattern = GetEcmlPattern(kEcmlCardExpireYear);
140      else
141        pattern = ASCIIToUTF16("|exp|^/|ccyear|year");
142
143      if (!ParseText(&q, pattern, &credit_card_field->expiration_year_))
144        return NULL;
145
146      continue;
147    }
148
149    if (ParseText(&q, GetEcmlPattern(kEcmlCardExpireDay)))
150      continue;
151
152    // Some pages (e.g. ExpediaBilling.html) have a "card description"
153    // field; we parse this field but ignore it.
154    // We also ignore any other fields within a credit card block that
155    // start with "card", under the assumption that they are related to
156    // the credit card section being processed but are uninteresting to us.
157    if (ParseText(&q, ASCIIToUTF16("^card")))
158      continue;
159
160    break;
161  }
162
163  // Some pages have a billing address field after the cardholder name field.
164  // For that case, allow only just the cardholder name field.  The remaining
165  // CC fields will be picked up in a following CreditCardField.
166  if (credit_card_field->cardholder_) {
167    *iter = q;
168    return credit_card_field.release();
169  }
170
171  // On some pages, the user selects a card type using radio buttons
172  // (e.g. test page Apple Store Billing.html).  We can't handle that yet,
173  // so we treat the card type as optional for now.
174  // The existence of a number or cvc in combination with expiration date is
175  // a strong enough signal that this is a credit card.  It is possible that
176  // the number and name were parsed in a separate part of the form.  So if
177  // the cvc and date were found independently they are returned.
178  if ((credit_card_field->number_ || credit_card_field->verification_) &&
179      credit_card_field->expiration_month_ &&
180      credit_card_field->expiration_year_) {
181      *iter = q;
182      return credit_card_field.release();
183  }
184
185  return NULL;
186}
187
188CreditCardField::CreditCardField()
189    : cardholder_(NULL),
190      cardholder_last_(NULL),
191      type_(NULL),
192      number_(NULL),
193      verification_(NULL),
194      expiration_month_(NULL),
195      expiration_year_(NULL) {
196}
197