address_field.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/address_field.h" 6 7#include <stddef.h> 8 9#include "base/logging.h" 10#include "base/memory/scoped_ptr.h" 11#include "base/strings/string16.h" 12#include "base/strings/string_util.h" 13#include "base/strings/utf_string_conversions.h" 14#include "components/autofill/core/browser/autofill_field.h" 15#include "components/autofill/core/browser/autofill_regex_constants.h" 16#include "components/autofill/core/browser/autofill_scanner.h" 17#include "components/autofill/core/browser/field_types.h" 18#include "ui/base/l10n/l10n_util.h" 19 20using base::UTF8ToUTF16; 21 22namespace autofill { 23 24FormField* AddressField::Parse(AutofillScanner* scanner) { 25 if (scanner->IsEnd()) 26 return NULL; 27 28 scoped_ptr<AddressField> address_field(new AddressField); 29 const AutofillField* const initial_field = scanner->Cursor(); 30 size_t saved_cursor = scanner->SaveCursor(); 31 32 base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe); 33 base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe); 34 35 // Allow address fields to appear in any order. 36 size_t begin_trailing_non_labeled_fields = 0; 37 bool has_trailing_non_labeled_fields = false; 38 while (!scanner->IsEnd()) { 39 const size_t cursor = scanner->SaveCursor(); 40 if (address_field->ParseAddressLines(scanner) || 41 address_field->ParseCity(scanner) || 42 address_field->ParseState(scanner) || 43 address_field->ParseZipCode(scanner) || 44 address_field->ParseCountry(scanner) || 45 address_field->ParseCompany(scanner)) { 46 has_trailing_non_labeled_fields = false; 47 continue; 48 } else if (ParseField(scanner, attention_ignored, NULL) || 49 ParseField(scanner, region_ignored, NULL)) { 50 // We ignore the following: 51 // * Attention. 52 // * Province/Region/Other. 53 continue; 54 } else if (scanner->Cursor() != initial_field && 55 ParseEmptyLabel(scanner, NULL)) { 56 // Ignore non-labeled fields within an address; the page 57 // MapQuest Driving Directions North America.html contains such a field. 58 // We only ignore such fields after we've parsed at least one other field; 59 // otherwise we'd effectively parse address fields before other field 60 // types after any non-labeled fields, and we want email address fields to 61 // have precedence since some pages contain fields labeled 62 // "Email address". 63 if (!has_trailing_non_labeled_fields) { 64 has_trailing_non_labeled_fields = true; 65 begin_trailing_non_labeled_fields = cursor; 66 } 67 68 continue; 69 } else { 70 // No field found. 71 break; 72 } 73 } 74 75 // If we have identified any address fields in this field then it should be 76 // added to the list of fields. 77 if (address_field->company_ || 78 address_field->address1_ || 79 address_field->address2_ || 80 address_field->street_address_ || 81 address_field->city_ || 82 address_field->state_ || 83 address_field->zip_ || 84 address_field->zip4_ || 85 address_field->country_) { 86 // Don't slurp non-labeled fields at the end into the address. 87 if (has_trailing_non_labeled_fields) 88 scanner->RewindTo(begin_trailing_non_labeled_fields); 89 90 return address_field.release(); 91 } 92 93 scanner->RewindTo(saved_cursor); 94 return NULL; 95} 96 97AddressField::AddressField() 98 : company_(NULL), 99 address1_(NULL), 100 address2_(NULL), 101 street_address_(NULL), 102 city_(NULL), 103 state_(NULL), 104 zip_(NULL), 105 zip4_(NULL), 106 country_(NULL) { 107} 108 109bool AddressField::ClassifyField(ServerFieldTypeMap* map) const { 110 // The page can request the address lines as a single textarea input or as 111 // multiple text fields (or not at all), but it shouldn't be possible to 112 // request both. 113 DCHECK(!(address1_ && street_address_)); 114 DCHECK(!(address2_ && street_address_)); 115 116 return AddClassification(company_, COMPANY_NAME, map) && 117 AddClassification(address1_, ADDRESS_HOME_LINE1, map) && 118 AddClassification(address2_, ADDRESS_HOME_LINE2, map) && 119 AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS, map) && 120 AddClassification(city_, ADDRESS_HOME_CITY, map) && 121 AddClassification(state_, ADDRESS_HOME_STATE, map) && 122 AddClassification(zip_, ADDRESS_HOME_ZIP, map) && 123 AddClassification(country_, ADDRESS_HOME_COUNTRY, map); 124} 125 126bool AddressField::ParseCompany(AutofillScanner* scanner) { 127 if (company_ && !company_->IsEmpty()) 128 return false; 129 130 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), &company_); 131} 132 133bool AddressField::ParseAddressLines(AutofillScanner* scanner) { 134 // We only match the string "address" in page text, not in element names, 135 // because sometimes every element in a group of address fields will have 136 // a name containing the string "address"; for example, on the page 137 // Kohl's - Register Billing Address.html the text element labeled "city" 138 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 139 // such as "address1", which appear as element names on various pages (eg 140 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 141 // EBay Registration Enter Information.html). 142 if (address1_ || street_address_) 143 return false; 144 145 base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re); 146 base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe); 147 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) && 148 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, 149 &address1_) && 150 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, 151 &street_address_) && 152 !ParseFieldSpecifics(scanner, label_pattern, 153 MATCH_LABEL | MATCH_TEXT_AREA, 154 &street_address_)) { 155 return false; 156 } 157 158 // Optionally parse more address lines, which may have empty labels. 159 pattern = UTF8ToUTF16(autofill::kAddressLine2Re); 160 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe); 161 if (!street_address_ && 162 !ParseEmptyLabel(scanner, &address2_) && 163 !ParseField(scanner, pattern, &address2_)) { 164 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, 165 &address2_); 166 } 167 168 // Try for surplus lines, which we will promptly discard. 169 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) 170 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! 171 if (address2_) { 172 pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe); 173 while (ParseField(scanner, pattern, NULL)) { 174 // Consumed a surplus line, try for another. 175 } 176 } 177 178 return true; 179} 180 181bool AddressField::ParseCountry(AutofillScanner* scanner) { 182 // Parse a country. The occasional page (e.g. 183 // Travelocity_New Member Information1.html) calls this a "location". 184 if (country_ && !country_->IsEmpty()) 185 return false; 186 187 return ParseFieldSpecifics(scanner, 188 UTF8ToUTF16(autofill::kCountryRe), 189 MATCH_DEFAULT | MATCH_SELECT, 190 &country_); 191} 192 193bool AddressField::ParseZipCode(AutofillScanner* scanner) { 194 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this 195 // is called a "post code". 196 if (zip_) 197 return false; 198 199 base::string16 pattern = UTF8ToUTF16(autofill::kZipCodeRe); 200 if (!ParseField(scanner, pattern, &zip_)) 201 return false; 202 203 // Look for a zip+4, whose field name will also often contain 204 // the substring "zip". 205 ParseField(scanner, UTF8ToUTF16(autofill::kZip4Re), &zip4_); 206 return true; 207} 208 209bool AddressField::ParseCity(AutofillScanner* scanner) { 210 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use 211 // the term "town". 212 if (city_) 213 return false; 214 215 // Select fields are allowed here. This occurs on top-100 site rediff.com. 216 return ParseFieldSpecifics(scanner, 217 UTF8ToUTF16(autofill::kCityRe), 218 MATCH_DEFAULT | MATCH_SELECT, 219 &city_); 220} 221 222bool AddressField::ParseState(AutofillScanner* scanner) { 223 if (state_) 224 return false; 225 226 return ParseFieldSpecifics(scanner, 227 UTF8ToUTF16(autofill::kStateRe), 228 MATCH_DEFAULT | MATCH_SELECT, 229 &state_); 230} 231 232} // namespace autofill 233