name_field.cc revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// Copyright 2013 The Chromium Authors. All rights reserved.
28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// Use of this source code is governed by a BSD-style license that can be
31f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt// found in the LICENSE file.
48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
5c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt#include "components/autofill/core/browser/name_field.h"
6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt
78d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "base/logging.h"
88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "base/memory/scoped_ptr.h"
98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "base/strings/string_util.h"
108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "base/strings/utf_string_conversions.h"
118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "components/autofill/core/browser/autofill_regex_constants.h"
128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "components/autofill/core/browser/autofill_scanner.h"
138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "components/autofill/core/browser/autofill_type.h"
148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include "ui/base/l10n/l10n_util.h"
151f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
161f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtusing base::UTF8ToUTF16;
171f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtnamespace autofill {
198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtnamespace {
208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// A form field that can parse a full name field.
228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtclass FullNameField : public NameField {
238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt public:
248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  static FullNameField* Parse(AutofillScanner* scanner);
258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt protected:
278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // FormField:
288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  virtual bool ClassifyField(ServerFieldTypeMap* map) const OVERRIDE;
298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt private:
318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  explicit FullNameField(AutofillField* field);
328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  AutofillField* field_;
348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  DISALLOW_COPY_AND_ASSIGN(FullNameField);
368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt};
378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// A form field that can parse a first and last name field.
398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtclass FirstLastNameField : public NameField {
408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt public:
418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  static FirstLastNameField* ParseSpecificName(AutofillScanner* scanner);
428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  static FirstLastNameField* ParseComponentNames(AutofillScanner* scanner);
438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  static FirstLastNameField* Parse(AutofillScanner* scanner);
448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt protected:
468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // FormField:
478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  virtual bool ClassifyField(ServerFieldTypeMap* map) const OVERRIDE;
488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt private:
508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  FirstLastNameField();
518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  AutofillField* first_name_;
538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  AutofillField* middle_name_;  // Optional.
548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  AutofillField* last_name_;
558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  bool middle_initial_;  // True if middle_name_ is a middle initial.
568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  DISALLOW_COPY_AND_ASSIGN(FirstLastNameField);
588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt};
598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}  // namespace
618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
628d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtFormField* NameField::Parse(AutofillScanner* scanner) {
638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (scanner->IsEnd())
648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return NULL;
658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Try FirstLastNameField first since it's more specific.
678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  NameField* field = FirstLastNameField::Parse(scanner);
688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  if (!field)
698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    field = FullNameField::Parse(scanner);
708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return field;
718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
721f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt
731f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt// This is overriden in concrete subclasses.
748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtbool NameField::ClassifyField(ServerFieldTypeMap* map) const {
758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  return false;
768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt}
778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt
788d520ff1dc2da35cdca849e982051b86468016d8Dmitry ShmidtFullNameField* FullNameField::Parse(AutofillScanner* scanner) {
798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  // Exclude e.g. "username" or "nickname" fields.
808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  scanner->SaveCursor();
818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt  bool should_ignore = ParseField(scanner,
828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt                                  UTF8ToUTF16(autofill::kNameIgnoredRe), NULL);
831f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  scanner->Rewind();
841f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt  if (should_ignore)
858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt    return NULL;
86
87  // Searching for any label containing the word "name" is too general;
88  // for example, Travelocity_Edit travel profile.html contains a field
89  // "Travel Profile Name".
90  AutofillField* field = NULL;
91  if (ParseField(scanner, UTF8ToUTF16(autofill::kNameRe), &field))
92    return new FullNameField(field);
93
94  return NULL;
95}
96
97bool FullNameField::ClassifyField(ServerFieldTypeMap* map) const {
98  return AddClassification(field_, NAME_FULL, map);
99}
100
101FullNameField::FullNameField(AutofillField* field) : field_(field) {
102}
103
104FirstLastNameField* FirstLastNameField::ParseSpecificName(
105    AutofillScanner* scanner) {
106  // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html)
107  // have the label "Name" followed by two or three text fields.
108  scoped_ptr<FirstLastNameField> v(new FirstLastNameField);
109  scanner->SaveCursor();
110
111  AutofillField* next = NULL;
112  if (ParseField(scanner,
113                 UTF8ToUTF16(autofill::kNameSpecificRe), &v->first_name_) &&
114      ParseEmptyLabel(scanner, &next)) {
115    if (ParseEmptyLabel(scanner, &v->last_name_)) {
116      // There are three name fields; assume that the middle one is a
117      // middle initial (it is, at least, on SmithsonianCheckout.html).
118      v->middle_name_ = next;
119      v->middle_initial_ = true;
120    } else {  // only two name fields
121      v->last_name_ = next;
122    }
123
124    return v.release();
125  }
126
127  scanner->Rewind();
128  return NULL;
129}
130
131FirstLastNameField* FirstLastNameField::ParseComponentNames(
132    AutofillScanner* scanner) {
133  scoped_ptr<FirstLastNameField> v(new FirstLastNameField);
134  scanner->SaveCursor();
135
136  // A fair number of pages use the names "fname" and "lname" for naming
137  // first and last name fields (examples from the test suite:
138  // BESTBUY_COM - Sign In2.html; Crate and Barrel Check Out.html;
139  // dell_checkout1.html).  At least one UK page (The China Shop2.html)
140  // asks, in stuffy English style, for just initials and a surname,
141  // so we match "initials" here (and just fill in a first name there,
142  // American-style).
143  // The ".*first$" matches fields ending in "first" (example in sample8.html).
144  // The ".*last$" matches fields ending in "last" (example in sample8.html).
145
146  // Allow name fields to appear in any order.
147  while (!scanner->IsEnd()) {
148    // Skip over any unrelated fields, e.g. "username" or "nickname".
149    if (ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kNameIgnoredRe),
150                            MATCH_DEFAULT | MATCH_SELECT, NULL)) {
151          continue;
152    }
153
154    if (!v->first_name_ &&
155        ParseField(scanner, UTF8ToUTF16(autofill::kFirstNameRe),
156                   &v->first_name_)) {
157      continue;
158    }
159
160    // We check for a middle initial before checking for a middle name
161    // because at least one page (PC Connection.html) has a field marked
162    // as both (the label text is "MI" and the element name is
163    // "txtmiddlename"); such a field probably actually represents a
164    // middle initial.
165    if (!v->middle_name_ &&
166        ParseField(scanner, UTF8ToUTF16(autofill::kMiddleInitialRe),
167                   &v->middle_name_)) {
168      v->middle_initial_ = true;
169      continue;
170    }
171
172    if (!v->middle_name_ &&
173        ParseField(scanner, UTF8ToUTF16(autofill::kMiddleNameRe),
174                   &v->middle_name_)) {
175      continue;
176    }
177
178    if (!v->last_name_ &&
179        ParseField(scanner, UTF8ToUTF16(autofill::kLastNameRe),
180                   &v->last_name_)) {
181      continue;
182    }
183
184    break;
185  }
186
187  // Consider the match to be successful if we detected both first and last name
188  // fields.
189  if (v->first_name_ && v->last_name_)
190    return v.release();
191
192  scanner->Rewind();
193  return NULL;
194}
195
196FirstLastNameField* FirstLastNameField::Parse(AutofillScanner* scanner) {
197  FirstLastNameField* field = ParseSpecificName(scanner);
198  if (!field)
199    field = ParseComponentNames(scanner);
200  return field;
201}
202
203FirstLastNameField::FirstLastNameField()
204    : first_name_(NULL),
205      middle_name_(NULL),
206      last_name_(NULL),
207      middle_initial_(false) {
208}
209
210bool FirstLastNameField::ClassifyField(ServerFieldTypeMap* map) const {
211  bool ok = AddClassification(first_name_, NAME_FIRST, map);
212  ok = ok && AddClassification(last_name_, NAME_LAST, map);
213  ServerFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE;
214  ok = ok && AddClassification(middle_name_, type, map);
215  return ok;
216}
217
218}  // namespace autofill
219