address_field.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autofill/address_field.h"
6
7#include "base/logging.h"
8#include "base/scoped_ptr.h"
9#include "base/string16.h"
10#include "base/string_util.h"
11#include "chrome/browser/autofill/autofill_field.h"
12
13bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
14  AutoFillFieldType address_company;
15  AutoFillFieldType address_line1;
16  AutoFillFieldType address_line2;
17  AutoFillFieldType address_appt_num;
18  AutoFillFieldType address_city;
19  AutoFillFieldType address_state;
20  AutoFillFieldType address_zip;
21  AutoFillFieldType address_country;
22
23  switch (type_) {
24    case kShippingAddress:
25     // Fall through. AutoFill does not support shipping addresses.
26    case kGenericAddress:
27      address_company = COMPANY_NAME;
28      address_line1 = ADDRESS_HOME_LINE1;
29      address_line2 = ADDRESS_HOME_LINE2;
30      address_appt_num = ADDRESS_HOME_APT_NUM;
31      address_city = ADDRESS_HOME_CITY;
32      address_state = ADDRESS_HOME_STATE;
33      address_zip = ADDRESS_HOME_ZIP;
34      address_country = ADDRESS_HOME_COUNTRY;
35      break;
36
37    case kBillingAddress:
38      address_company = COMPANY_NAME;
39      address_line1 = ADDRESS_BILLING_LINE1;
40      address_line2 = ADDRESS_BILLING_LINE2;
41      address_appt_num = ADDRESS_BILLING_APT_NUM;
42      address_city = ADDRESS_BILLING_CITY;
43      address_state = ADDRESS_BILLING_STATE;
44      address_zip = ADDRESS_BILLING_ZIP;
45      address_country = ADDRESS_BILLING_COUNTRY;
46      break;
47
48    default:
49      NOTREACHED();
50      return false;
51  }
52
53  bool ok;
54  ok = Add(field_type_map, company_, AutoFillType(address_company));
55  DCHECK(ok);
56  ok = ok && Add(field_type_map, address1_, AutoFillType(address_line1));
57  DCHECK(ok);
58  ok = ok && Add(field_type_map, address2_, AutoFillType(address_line2));
59  DCHECK(ok);
60  ok = ok && Add(field_type_map, city_, AutoFillType(address_city));
61  DCHECK(ok);
62  ok = ok && Add(field_type_map, state_, AutoFillType(address_state));
63  DCHECK(ok);
64  ok = ok && Add(field_type_map, zip_, AutoFillType(address_zip));
65  DCHECK(ok);
66  ok = ok && Add(field_type_map, country_, AutoFillType(address_country));
67  DCHECK(ok);
68
69  return ok;
70}
71
72AddressField* AddressField::Parse(
73    std::vector<AutoFillField*>::const_iterator* iter,
74    bool is_ecml) {
75  DCHECK(iter);
76  if (!iter)
77    return NULL;
78
79  scoped_ptr<AddressField> address_field(new AddressField);
80  std::vector<AutoFillField*>::const_iterator q = *iter;
81  string16 pattern;
82
83  // The ECML standard uses 2 letter country codes.  So we will
84  // have to remember that this is an ECML form, for when we fill
85  // it out.
86  address_field->is_ecml_ = is_ecml;
87
88  // Allow address fields to appear in any order.
89  while (true) {
90    if (ParseCompany(&q, is_ecml, address_field.get()) ||
91        ParseAddressLines(&q, is_ecml, address_field.get()) ||
92        ParseCity(&q, is_ecml, address_field.get()) ||
93        ParseZipCode(&q, is_ecml, address_field.get()) ||
94        ParseCountry(&q, is_ecml, address_field.get())) {
95      continue;
96    } else if ((!address_field->state_ || address_field->state_->IsEmpty()) &&
97               address_field->ParseState(&q, is_ecml, address_field.get())) {
98      continue;
99    } else if (ParseText(&q, ASCIIToUTF16("attention|attn.")) ||
100               ParseText(&q, ASCIIToUTF16("province|region|other"))) {
101      // We ignore the following:
102      // * Attention.
103      // * Province/Region/Other.
104      continue;
105    } else if (*q != **iter && ParseEmpty(&q)) {
106      // Ignore non-labeled fields within an address; the page
107      // MapQuest Driving Directions North America.html contains such a field.
108      // We only ignore such fields after we've parsed at least one other field;
109      // otherwise we'd effectively parse address fields before other field
110      // types after any non-labeled fields, and we want email address fields to
111      // have precedence since some pages contain fields labeled
112      // "Email address".
113      continue;
114    } else {
115      // No field found.
116      break;
117    }
118  }
119
120  // If we have identified any address fields in this field then it should be
121  // added to the list of fields.
122  if (address_field->company_ != NULL ||
123      address_field->address1_ != NULL || address_field->address2_ != NULL ||
124      address_field->city_ != NULL || address_field->state_ != NULL ||
125      address_field->zip_ != NULL || address_field->zip4_ ||
126      address_field->country_ != NULL) {
127    *iter = q;
128    return address_field.release();
129  }
130
131  return NULL;
132}
133
134AddressType AddressField::FindType() const {
135  // This is not a full address, so don't even bother trying to figure
136  // out its type.
137  if (address1_ == NULL)
138    return kGenericAddress;
139
140  // First look at the field name, which itself will sometimes contain
141  // "bill" or "ship".  We could check for the ECML type prefixes
142  // here, but there's no need to since ECML's prefixes Ecom_BillTo
143  // and Ecom_ShipTo contain "bill" and "ship" anyway.
144  string16 name = StringToLowerASCII(address1_->name());
145  return AddressTypeFromText(name);
146}
147
148AddressField::AddressField()
149    : company_(NULL),
150      address1_(NULL),
151      address2_(NULL),
152      city_(NULL),
153      state_(NULL),
154      zip_(NULL),
155      zip4_(NULL),
156      country_(NULL),
157      type_(kGenericAddress),
158      is_ecml_(false) {
159}
160
161// static
162bool AddressField::ParseCompany(
163    std::vector<AutoFillField*>::const_iterator* iter,
164    bool is_ecml, AddressField* address_field) {
165  if (address_field->company_ && !address_field->company_->IsEmpty())
166    return false;
167
168  string16 pattern;
169  if (is_ecml)
170    pattern = GetEcmlPattern(kEcmlShipToCompanyName,
171                             kEcmlBillToCompanyName, '|');
172  else
173    pattern = ASCIIToUTF16("company|business name");
174
175  if (!ParseText(iter, pattern, &address_field->company_))
176    return false;
177
178  return true;
179}
180
181// static
182bool AddressField::ParseAddressLines(
183    std::vector<AutoFillField*>::const_iterator* iter,
184    bool is_ecml, AddressField* address_field) {
185  // We only match the string "address" in page text, not in element names,
186  // because sometimes every element in a group of address fields will have
187  // a name containing the string "address"; for example, on the page
188  // Kohl's - Register Billing Address.html the text element labeled "city"
189  // has the name "BILL_TO_ADDRESS<>city".  We do match address labels
190  // such as "address1", which appear as element names on various pages (eg
191  // AmericanGirl-Registration.html, BloomingdalesBilling.html,
192  // EBay Registration Enter Information.html).
193  if (address_field->address1_)
194    return false;
195
196  string16 pattern;
197  if (is_ecml) {
198    pattern = GetEcmlPattern(kEcmlShipToAddress1,
199                             kEcmlBillToAddress1, '|');
200    if (!ParseText(iter, pattern, &address_field->address1_))
201      return false;
202  } else {
203    pattern =
204        ASCIIToUTF16("street|address line|address1|street_line1|addr1");
205    string16 label_pattern = ASCIIToUTF16("address");
206
207    if (!ParseText(iter, pattern, &address_field->address1_))
208      if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
209        return false;
210  }
211
212  // Some pages (e.g. expedia_checkout.html) have an apartment or
213  // suite number at this point.  The occasional page (e.g.
214  // Ticketmaster3.html) calls this a unit number.  We ignore this
215  // field since we can't fill it yet.
216  ParseText(iter, ASCIIToUTF16("suite|unit"));
217
218  // Optionally parse more address lines, which may have empty labels.
219  // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
220  // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
221  if (is_ecml) {
222    pattern = GetEcmlPattern(kEcmlShipToAddress2,
223                             kEcmlBillToAddress2, '|');
224    if (!ParseEmptyText(iter, &address_field->address2_))
225      ParseText(iter, pattern, &address_field->address2_);
226  } else {
227    pattern = ASCIIToUTF16("address2|street|street_line2|addr2");
228    string16 label_pattern = ASCIIToUTF16("address");
229    if (!ParseEmptyText(iter, &address_field->address2_))
230      if (!ParseText(iter, pattern, &address_field->address2_))
231        ParseLabelText(iter, label_pattern, &address_field->address2_);
232  }
233
234  // Try for a third line, which we will promptly discard.
235  if (address_field->address2_ != NULL) {
236    if (is_ecml) {
237      pattern = GetEcmlPattern(kEcmlShipToAddress3,
238                               kEcmlBillToAddress3, '|');
239      ParseText(iter, pattern);
240    } else {
241      pattern = ASCIIToUTF16("line3");
242      ParseLabelText(iter, pattern, NULL);
243    }
244  }
245
246  return true;
247}
248
249// static
250bool AddressField::ParseCountry(
251    std::vector<AutoFillField*>::const_iterator* iter,
252    bool is_ecml, AddressField* address_field) {
253  // Parse a country.  The occasional page (e.g.
254  // Travelocity_New Member Information1.html) calls this a "location".
255  // Note: ECML standard uses 2 letter country code (ISO 3166)
256  if (address_field->country_ && !address_field->country_->IsEmpty())
257    return false;
258
259  string16 pattern;
260  if (is_ecml)
261    pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
262  else
263    pattern = ASCIIToUTF16("country|location");
264
265  if (!ParseText(iter, pattern, &address_field->country_))
266    return false;
267
268  return true;
269}
270
271// static
272bool AddressField::ParseZipCode(
273    std::vector<AutoFillField*>::const_iterator* iter,
274    bool is_ecml, AddressField* address_field) {
275  // Parse a zip code.  On some UK pages (e.g. The China Shop2.html) this
276  // is called a "post code".
277  //
278  // HACK: Just for the MapQuest driving directions page we match the
279  // exact name "1z", which MapQuest uses to label its zip code field.
280  // Hopefully before long we'll be smart enough to find the zip code
281  // on that page automatically.
282  if (address_field->zip_)
283    return false;
284
285  // We may be out of fields.
286  if (!**iter)
287    return false;
288
289  string16 pattern;
290  if (is_ecml) {
291    pattern = GetEcmlPattern(kEcmlShipToPostalCode,
292                             kEcmlBillToPostalCode, '|');
293  } else {
294    pattern = ASCIIToUTF16("zip|postal|post code|pcode|^1z$");
295  }
296
297  AddressType tempType;
298  string16 name = (**iter)->name();
299
300  // Note: comparisons using the ecml compliant name as a prefix must be used in
301  // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
302  // more detail.
303  string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
304  if (StartsWith(name, bill_to_postal_code_field, false)) {
305    tempType = kBillingAddress;
306  } else if (StartsWith(name, bill_to_postal_code_field, false)) {
307    tempType = kShippingAddress;
308  } else {
309    tempType = kGenericAddress;
310  }
311
312  if (!ParseText(iter, pattern, &address_field->zip_))
313    return false;
314
315  address_field->type_ = tempType;
316  if (!is_ecml) {
317    // Look for a zip+4, whose field name will also often contain
318    // the substring "zip".
319    ParseText(iter, ASCIIToUTF16("zip|^-$"), &address_field->zip4_);
320  }
321
322  return true;
323}
324
325// static
326bool AddressField::ParseCity(
327    std::vector<AutoFillField*>::const_iterator* iter,
328    bool is_ecml, AddressField* address_field) {
329  // Parse a city name.  Some UK pages (e.g. The China Shop2.html) use
330  // the term "town".
331  if (address_field->city_)
332    return false;
333
334  string16 pattern;
335  if (is_ecml)
336    pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
337  else
338    pattern = ASCIIToUTF16("city|town");
339
340  if (!ParseText(iter, pattern, &address_field->city_))
341    return false;
342
343  return true;
344}
345
346bool AddressField::ParseState(
347    std::vector<AutoFillField*>::const_iterator* iter,
348    bool is_ecml, AddressField* address_field) {
349  string16 pattern;
350  if (is_ecml)
351    pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
352  else
353    pattern = ASCIIToUTF16("state|county");
354
355  if (!ParseText(iter, pattern, &address_field->state_))
356    return false;
357
358  return true;
359}
360
361AddressType AddressField::AddressTypeFromText(const string16 &text) {
362  if (text.find(ASCIIToUTF16("same as")) != string16::npos ||
363      text.find(ASCIIToUTF16("use my")) != string16::npos)
364    // This text could be a checkbox label such as "same as my billing
365    // address" or "use my shipping address".
366    // ++ It would help if we generally skipped all text that appears
367    // after a check box.
368    return kGenericAddress;
369
370  // Not all pages say "billing address" and "shipping address" explicitly;
371  // for example, Craft Catalog1.html has "Bill-to Address" and
372  // "Ship-to Address".
373  size_t bill = text.rfind(ASCIIToUTF16("bill"));
374  size_t ship = text.rfind(ASCIIToUTF16("ship"));
375
376  if (bill == string16::npos && ship == string16::npos)
377    return kGenericAddress;
378
379  if (bill != string16::npos && ship == string16::npos)
380    return kBillingAddress;
381
382  if (bill == string16::npos && ship != string16::npos)
383    return kShippingAddress;
384
385  if (bill > ship)
386    return kBillingAddress;
387
388  return kShippingAddress;
389}
390