address_field.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autofill/address_field.h" 6 7#include "base/logging.h" 8#include "base/scoped_ptr.h" 9#include "base/string16.h" 10#include "base/string_util.h" 11#include "chrome/browser/autofill/autofill_field.h" 12 13bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const { 14 AutoFillFieldType address_company; 15 AutoFillFieldType address_line1; 16 AutoFillFieldType address_line2; 17 AutoFillFieldType address_appt_num; 18 AutoFillFieldType address_city; 19 AutoFillFieldType address_state; 20 AutoFillFieldType address_zip; 21 AutoFillFieldType address_country; 22 23 switch (type_) { 24 case kShippingAddress: 25 // Fall through. AutoFill does not support shipping addresses. 26 case kGenericAddress: 27 address_company = COMPANY_NAME; 28 address_line1 = ADDRESS_HOME_LINE1; 29 address_line2 = ADDRESS_HOME_LINE2; 30 address_appt_num = ADDRESS_HOME_APT_NUM; 31 address_city = ADDRESS_HOME_CITY; 32 address_state = ADDRESS_HOME_STATE; 33 address_zip = ADDRESS_HOME_ZIP; 34 address_country = ADDRESS_HOME_COUNTRY; 35 break; 36 37 case kBillingAddress: 38 address_company = COMPANY_NAME; 39 address_line1 = ADDRESS_BILLING_LINE1; 40 address_line2 = ADDRESS_BILLING_LINE2; 41 address_appt_num = ADDRESS_BILLING_APT_NUM; 42 address_city = ADDRESS_BILLING_CITY; 43 address_state = ADDRESS_BILLING_STATE; 44 address_zip = ADDRESS_BILLING_ZIP; 45 address_country = ADDRESS_BILLING_COUNTRY; 46 break; 47 48 default: 49 NOTREACHED(); 50 return false; 51 } 52 53 bool ok; 54 ok = Add(field_type_map, company_, AutoFillType(address_company)); 55 DCHECK(ok); 56 ok = ok && Add(field_type_map, address1_, AutoFillType(address_line1)); 57 DCHECK(ok); 58 ok = ok && Add(field_type_map, address2_, AutoFillType(address_line2)); 59 DCHECK(ok); 60 ok = ok && Add(field_type_map, city_, AutoFillType(address_city)); 61 DCHECK(ok); 62 ok = ok && Add(field_type_map, state_, AutoFillType(address_state)); 63 DCHECK(ok); 64 ok = ok && Add(field_type_map, zip_, AutoFillType(address_zip)); 65 DCHECK(ok); 66 ok = ok && Add(field_type_map, country_, AutoFillType(address_country)); 67 DCHECK(ok); 68 69 return ok; 70} 71 72AddressField* AddressField::Parse( 73 std::vector<AutoFillField*>::const_iterator* iter, 74 bool is_ecml) { 75 DCHECK(iter); 76 if (!iter) 77 return NULL; 78 79 scoped_ptr<AddressField> address_field(new AddressField); 80 std::vector<AutoFillField*>::const_iterator q = *iter; 81 string16 pattern; 82 83 // The ECML standard uses 2 letter country codes. So we will 84 // have to remember that this is an ECML form, for when we fill 85 // it out. 86 address_field->is_ecml_ = is_ecml; 87 88 // Allow address fields to appear in any order. 89 while (true) { 90 if (ParseCompany(&q, is_ecml, address_field.get()) || 91 ParseAddressLines(&q, is_ecml, address_field.get()) || 92 ParseCity(&q, is_ecml, address_field.get()) || 93 ParseZipCode(&q, is_ecml, address_field.get()) || 94 ParseCountry(&q, is_ecml, address_field.get())) { 95 continue; 96 } else if ((!address_field->state_ || address_field->state_->IsEmpty()) && 97 address_field->ParseState(&q, is_ecml, address_field.get())) { 98 continue; 99 } else if (ParseText(&q, ASCIIToUTF16("attention|attn.")) || 100 ParseText(&q, ASCIIToUTF16("province|region|other"))) { 101 // We ignore the following: 102 // * Attention. 103 // * Province/Region/Other. 104 continue; 105 } else if (*q != **iter && ParseEmpty(&q)) { 106 // Ignore non-labeled fields within an address; the page 107 // MapQuest Driving Directions North America.html contains such a field. 108 // We only ignore such fields after we've parsed at least one other field; 109 // otherwise we'd effectively parse address fields before other field 110 // types after any non-labeled fields, and we want email address fields to 111 // have precedence since some pages contain fields labeled 112 // "Email address". 113 continue; 114 } else { 115 // No field found. 116 break; 117 } 118 } 119 120 // If we have identified any address fields in this field then it should be 121 // added to the list of fields. 122 if (address_field->company_ != NULL || 123 address_field->address1_ != NULL || address_field->address2_ != NULL || 124 address_field->city_ != NULL || address_field->state_ != NULL || 125 address_field->zip_ != NULL || address_field->zip4_ || 126 address_field->country_ != NULL) { 127 *iter = q; 128 return address_field.release(); 129 } 130 131 return NULL; 132} 133 134AddressType AddressField::FindType() const { 135 // This is not a full address, so don't even bother trying to figure 136 // out its type. 137 if (address1_ == NULL) 138 return kGenericAddress; 139 140 // First look at the field name, which itself will sometimes contain 141 // "bill" or "ship". We could check for the ECML type prefixes 142 // here, but there's no need to since ECML's prefixes Ecom_BillTo 143 // and Ecom_ShipTo contain "bill" and "ship" anyway. 144 string16 name = StringToLowerASCII(address1_->name()); 145 return AddressTypeFromText(name); 146} 147 148AddressField::AddressField() 149 : company_(NULL), 150 address1_(NULL), 151 address2_(NULL), 152 city_(NULL), 153 state_(NULL), 154 zip_(NULL), 155 zip4_(NULL), 156 country_(NULL), 157 type_(kGenericAddress), 158 is_ecml_(false) { 159} 160 161// static 162bool AddressField::ParseCompany( 163 std::vector<AutoFillField*>::const_iterator* iter, 164 bool is_ecml, AddressField* address_field) { 165 if (address_field->company_ && !address_field->company_->IsEmpty()) 166 return false; 167 168 string16 pattern; 169 if (is_ecml) 170 pattern = GetEcmlPattern(kEcmlShipToCompanyName, 171 kEcmlBillToCompanyName, '|'); 172 else 173 pattern = ASCIIToUTF16("company|business name"); 174 175 if (!ParseText(iter, pattern, &address_field->company_)) 176 return false; 177 178 return true; 179} 180 181// static 182bool AddressField::ParseAddressLines( 183 std::vector<AutoFillField*>::const_iterator* iter, 184 bool is_ecml, AddressField* address_field) { 185 // We only match the string "address" in page text, not in element names, 186 // because sometimes every element in a group of address fields will have 187 // a name containing the string "address"; for example, on the page 188 // Kohl's - Register Billing Address.html the text element labeled "city" 189 // has the name "BILL_TO_ADDRESS<>city". We do match address labels 190 // such as "address1", which appear as element names on various pages (eg 191 // AmericanGirl-Registration.html, BloomingdalesBilling.html, 192 // EBay Registration Enter Information.html). 193 if (address_field->address1_) 194 return false; 195 196 string16 pattern; 197 if (is_ecml) { 198 pattern = GetEcmlPattern(kEcmlShipToAddress1, 199 kEcmlBillToAddress1, '|'); 200 if (!ParseText(iter, pattern, &address_field->address1_)) 201 return false; 202 } else { 203 pattern = 204 ASCIIToUTF16("street|address line|address1|street_line1|addr1"); 205 string16 label_pattern = ASCIIToUTF16("address"); 206 207 if (!ParseText(iter, pattern, &address_field->address1_)) 208 if (!ParseLabelText(iter, label_pattern, &address_field->address1_)) 209 return false; 210 } 211 212 // Some pages (e.g. expedia_checkout.html) have an apartment or 213 // suite number at this point. The occasional page (e.g. 214 // Ticketmaster3.html) calls this a unit number. We ignore this 215 // field since we can't fill it yet. 216 ParseText(iter, ASCIIToUTF16("suite|unit")); 217 218 // Optionally parse more address lines, which may have empty labels. 219 // Some pages have 3 address lines (eg SharperImageModifyAccount.html) 220 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)! 221 if (is_ecml) { 222 pattern = GetEcmlPattern(kEcmlShipToAddress2, 223 kEcmlBillToAddress2, '|'); 224 if (!ParseEmptyText(iter, &address_field->address2_)) 225 ParseText(iter, pattern, &address_field->address2_); 226 } else { 227 pattern = ASCIIToUTF16("address2|street|street_line2|addr2"); 228 string16 label_pattern = ASCIIToUTF16("address"); 229 if (!ParseEmptyText(iter, &address_field->address2_)) 230 if (!ParseText(iter, pattern, &address_field->address2_)) 231 ParseLabelText(iter, label_pattern, &address_field->address2_); 232 } 233 234 // Try for a third line, which we will promptly discard. 235 if (address_field->address2_ != NULL) { 236 if (is_ecml) { 237 pattern = GetEcmlPattern(kEcmlShipToAddress3, 238 kEcmlBillToAddress3, '|'); 239 ParseText(iter, pattern); 240 } else { 241 pattern = ASCIIToUTF16("line3"); 242 ParseLabelText(iter, pattern, NULL); 243 } 244 } 245 246 return true; 247} 248 249// static 250bool AddressField::ParseCountry( 251 std::vector<AutoFillField*>::const_iterator* iter, 252 bool is_ecml, AddressField* address_field) { 253 // Parse a country. The occasional page (e.g. 254 // Travelocity_New Member Information1.html) calls this a "location". 255 // Note: ECML standard uses 2 letter country code (ISO 3166) 256 if (address_field->country_ && !address_field->country_->IsEmpty()) 257 return false; 258 259 string16 pattern; 260 if (is_ecml) 261 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|'); 262 else 263 pattern = ASCIIToUTF16("country|location"); 264 265 if (!ParseText(iter, pattern, &address_field->country_)) 266 return false; 267 268 return true; 269} 270 271// static 272bool AddressField::ParseZipCode( 273 std::vector<AutoFillField*>::const_iterator* iter, 274 bool is_ecml, AddressField* address_field) { 275 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this 276 // is called a "post code". 277 // 278 // HACK: Just for the MapQuest driving directions page we match the 279 // exact name "1z", which MapQuest uses to label its zip code field. 280 // Hopefully before long we'll be smart enough to find the zip code 281 // on that page automatically. 282 if (address_field->zip_) 283 return false; 284 285 // We may be out of fields. 286 if (!**iter) 287 return false; 288 289 string16 pattern; 290 if (is_ecml) { 291 pattern = GetEcmlPattern(kEcmlShipToPostalCode, 292 kEcmlBillToPostalCode, '|'); 293 } else { 294 pattern = ASCIIToUTF16("zip|postal|post code|pcode|^1z$"); 295 } 296 297 AddressType tempType; 298 string16 name = (**iter)->name(); 299 300 // Note: comparisons using the ecml compliant name as a prefix must be used in 301 // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for 302 // more detail. 303 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode)); 304 if (StartsWith(name, bill_to_postal_code_field, false)) { 305 tempType = kBillingAddress; 306 } else if (StartsWith(name, bill_to_postal_code_field, false)) { 307 tempType = kShippingAddress; 308 } else { 309 tempType = kGenericAddress; 310 } 311 312 if (!ParseText(iter, pattern, &address_field->zip_)) 313 return false; 314 315 address_field->type_ = tempType; 316 if (!is_ecml) { 317 // Look for a zip+4, whose field name will also often contain 318 // the substring "zip". 319 ParseText(iter, ASCIIToUTF16("zip|^-$"), &address_field->zip4_); 320 } 321 322 return true; 323} 324 325// static 326bool AddressField::ParseCity( 327 std::vector<AutoFillField*>::const_iterator* iter, 328 bool is_ecml, AddressField* address_field) { 329 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use 330 // the term "town". 331 if (address_field->city_) 332 return false; 333 334 string16 pattern; 335 if (is_ecml) 336 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|'); 337 else 338 pattern = ASCIIToUTF16("city|town"); 339 340 if (!ParseText(iter, pattern, &address_field->city_)) 341 return false; 342 343 return true; 344} 345 346bool AddressField::ParseState( 347 std::vector<AutoFillField*>::const_iterator* iter, 348 bool is_ecml, AddressField* address_field) { 349 string16 pattern; 350 if (is_ecml) 351 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|'); 352 else 353 pattern = ASCIIToUTF16("state|county"); 354 355 if (!ParseText(iter, pattern, &address_field->state_)) 356 return false; 357 358 return true; 359} 360 361AddressType AddressField::AddressTypeFromText(const string16 &text) { 362 if (text.find(ASCIIToUTF16("same as")) != string16::npos || 363 text.find(ASCIIToUTF16("use my")) != string16::npos) 364 // This text could be a checkbox label such as "same as my billing 365 // address" or "use my shipping address". 366 // ++ It would help if we generally skipped all text that appears 367 // after a check box. 368 return kGenericAddress; 369 370 // Not all pages say "billing address" and "shipping address" explicitly; 371 // for example, Craft Catalog1.html has "Bill-to Address" and 372 // "Ship-to Address". 373 size_t bill = text.rfind(ASCIIToUTF16("bill")); 374 size_t ship = text.rfind(ASCIIToUTF16("ship")); 375 376 if (bill == string16::npos && ship == string16::npos) 377 return kGenericAddress; 378 379 if (bill != string16::npos && ship == string16::npos) 380 return kBillingAddress; 381 382 if (bill == string16::npos && ship != string16::npos) 383 return kShippingAddress; 384 385 if (bill > ship) 386 return kBillingAddress; 387 388 return kShippingAddress; 389} 390