form_structure.cc revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/i18n/case_conversion.h"
12#include "base/logging.h"
13#include "base/memory/scoped_ptr.h"
14#include "base/sha1.h"
15#include "base/strings/string_number_conversions.h"
16#include "base/strings/string_util.h"
17#include "base/strings/stringprintf.h"
18#include "base/strings/utf_string_conversions.h"
19#include "base/time/time.h"
20#include "components/autofill/core/browser/autofill_metrics.h"
21#include "components/autofill/core/browser/autofill_type.h"
22#include "components/autofill/core/browser/autofill_xml_parser.h"
23#include "components/autofill/core/browser/field_types.h"
24#include "components/autofill/core/browser/form_field.h"
25#include "components/autofill/core/common/autofill_constants.h"
26#include "components/autofill/core/common/form_data.h"
27#include "components/autofill/core/common/form_data_predictions.h"
28#include "components/autofill/core/common/form_field_data.h"
29#include "components/autofill/core/common/form_field_data_predictions.h"
30#include "third_party/icu/source/i18n/unicode/regex.h"
31#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
32
33namespace autofill {
34namespace {
35
36// XML elements and attributes.
37const char kAttributeAutofillUsed[] = "autofillused";
38const char kAttributeAutofillType[] = "autofilltype";
39const char kAttributeClientVersion[] = "clientversion";
40const char kAttributeDataPresent[] = "datapresent";
41const char kAttributeFieldID[] = "fieldid";
42const char kAttributeFieldType[] = "fieldtype";
43const char kAttributeFormSignature[] = "formsignature";
44const char kAttributeName[] = "name";
45const char kAttributeSignature[] = "signature";
46const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
47const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
48const char kXMLElementAutofillQuery[] = "autofillquery";
49const char kXMLElementAutofillUpload[] = "autofillupload";
50const char kXMLElementFieldAssignments[] = "fieldassignments";
51const char kXMLElementField[] = "field";
52const char kXMLElementFields[] = "fields";
53const char kXMLElementForm[] = "form";
54const char kBillingMode[] = "billing";
55const char kShippingMode[] = "shipping";
56
57// Stip away >= 5 consecutive digits.
58const char kIgnorePatternInFieldName[] = "\\d{5,}+";
59
60// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
61// |available_field_types| and returns the hex representation as a string.
62std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
63  // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
64  // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
65  const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
66
67  // Pack the types in |available_field_types| into |bit_field|.
68  std::vector<uint8> bit_field(kNumBytes, 0);
69  for (ServerFieldTypeSet::const_iterator field_type =
70           available_field_types.begin();
71       field_type != available_field_types.end();
72       ++field_type) {
73    // Set the appropriate bit in the field.  The bit we set is the one
74    // |field_type| % 8 from the left of the byte.
75    const size_t byte = *field_type / 8;
76    const size_t bit = 0x80 >> (*field_type % 8);
77    DCHECK(byte < bit_field.size());
78    bit_field[byte] |= bit;
79  }
80
81  // Discard any trailing zeroes.
82  // If there are no available types, we return the empty string.
83  size_t data_end = bit_field.size();
84  for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
85  }
86
87  // Print all meaningfull bytes into a string.
88  std::string data_presence;
89  data_presence.reserve(data_end * 2 + 1);
90  for (size_t i = 0; i < data_end; ++i) {
91    base::StringAppendF(&data_presence, "%02x", bit_field[i]);
92  }
93
94  return data_presence;
95}
96
97// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
98// in upload xml, and also add them to the parent XmlElement.
99void EncodeFieldForUpload(const AutofillField& field,
100                          buzz::XmlElement* parent) {
101  // Don't upload checkable fields.
102  if (field.is_checkable)
103    return;
104
105  ServerFieldTypeSet types = field.possible_types();
106  // |types| could be empty in unit-tests only.
107  for (ServerFieldTypeSet::iterator field_type = types.begin();
108       field_type != types.end(); ++field_type) {
109    buzz::XmlElement *field_element = new buzz::XmlElement(
110        buzz::QName(kXMLElementField));
111
112    field_element->SetAttr(buzz::QName(kAttributeSignature),
113                           field.FieldSignature());
114    field_element->SetAttr(buzz::QName(kAttributeAutofillType),
115                           base::IntToString(*field_type));
116    parent->AddElement(field_element);
117  }
118}
119
120// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
121// in query xml, and also add it to the parent XmlElement.
122void EncodeFieldForQuery(const AutofillField& field,
123                         buzz::XmlElement* parent) {
124  buzz::XmlElement *field_element = new buzz::XmlElement(
125      buzz::QName(kXMLElementField));
126  field_element->SetAttr(buzz::QName(kAttributeSignature),
127                         field.FieldSignature());
128  parent->AddElement(field_element);
129}
130
131// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
132// in field assignments xml, and also add them to the parent XmlElement.
133void EncodeFieldForFieldAssignments(const AutofillField& field,
134                                    buzz::XmlElement* parent) {
135  ServerFieldTypeSet types = field.possible_types();
136  for (ServerFieldTypeSet::iterator field_type = types.begin();
137       field_type != types.end(); ++field_type) {
138    buzz::XmlElement *field_element = new buzz::XmlElement(
139        buzz::QName(kXMLElementFields));
140
141    field_element->SetAttr(buzz::QName(kAttributeFieldID),
142                           field.FieldSignature());
143    field_element->SetAttr(buzz::QName(kAttributeFieldType),
144                           base::IntToString(*field_type));
145    field_element->SetAttr(buzz::QName(kAttributeName),
146                           base::UTF16ToUTF8(field.name));
147    parent->AddElement(field_element);
148  }
149}
150
151// Returns |true| iff the |token| is a type hint for a contact field, as
152// specified in the implementation section of http://is.gd/whatwg_autocomplete
153// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
154// support filling either type of information.
155bool IsContactTypeHint(const std::string& token) {
156  return token == "home" || token == "work" || token == "mobile";
157}
158
159// Returns |true| iff the |token| is a type hint appropriate for a field of the
160// given |field_type|, as specified in the implementation section of
161// http://is.gd/whatwg_autocomplete
162bool ContactTypeHintMatchesFieldType(const std::string& token,
163                                     HtmlFieldType field_type) {
164  // The "home" and "work" type hints are only appropriate for email and phone
165  // number field types.
166  if (token == "home" || token == "work") {
167    return field_type == HTML_TYPE_EMAIL ||
168        (field_type >= HTML_TYPE_TEL &&
169         field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
170  }
171
172  // The "mobile" type hint is only appropriate for phone number field types.
173  // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
174  // support filling either type of information.
175  if (token == "mobile") {
176    return field_type >= HTML_TYPE_TEL &&
177        field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
178  }
179
180  return false;
181}
182
183// Returns the Chrome Autofill-supported field type corresponding to the given
184// |autocomplete_attribute_value|, if there is one, in the context of the given
185// |field|.  Chrome Autofill supports a subset of the field types listed at
186// http://is.gd/whatwg_autocomplete
187HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
188    const std::string& autocomplete_attribute_value,
189    const AutofillField& field) {
190  if (autocomplete_attribute_value == "name")
191    return HTML_TYPE_NAME;
192
193  if (autocomplete_attribute_value == "given-name")
194    return HTML_TYPE_GIVEN_NAME;
195
196  if (autocomplete_attribute_value == "additional-name") {
197    if (field.max_length == 1)
198      return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
199    else
200      return HTML_TYPE_ADDITIONAL_NAME;
201  }
202
203  if (autocomplete_attribute_value == "family-name")
204    return HTML_TYPE_FAMILY_NAME;
205
206  if (autocomplete_attribute_value == "organization")
207    return HTML_TYPE_ORGANIZATION;
208
209  if (autocomplete_attribute_value == "street-address")
210    return HTML_TYPE_STREET_ADDRESS;
211
212  if (autocomplete_attribute_value == "address-line1")
213    return HTML_TYPE_ADDRESS_LINE1;
214
215  if (autocomplete_attribute_value == "address-line2")
216    return HTML_TYPE_ADDRESS_LINE2;
217
218  if (autocomplete_attribute_value == "address-line3")
219    return HTML_TYPE_ADDRESS_LINE3;
220
221  // TODO(estade): remove support for "locality" and "region".
222  if (autocomplete_attribute_value == "locality")
223    return HTML_TYPE_ADDRESS_LEVEL2;
224
225  if (autocomplete_attribute_value == "region")
226    return HTML_TYPE_ADDRESS_LEVEL1;
227
228  if (autocomplete_attribute_value == "address-level1")
229    return HTML_TYPE_ADDRESS_LEVEL1;
230
231  if (autocomplete_attribute_value == "address-level2")
232    return HTML_TYPE_ADDRESS_LEVEL2;
233
234  if (autocomplete_attribute_value == "address-level3")
235    return HTML_TYPE_ADDRESS_LEVEL3;
236
237  if (autocomplete_attribute_value == "country")
238    return HTML_TYPE_COUNTRY_CODE;
239
240  if (autocomplete_attribute_value == "country-name")
241    return HTML_TYPE_COUNTRY_NAME;
242
243  if (autocomplete_attribute_value == "postal-code")
244    return HTML_TYPE_POSTAL_CODE;
245
246  // content_switches.h isn't accessible from here, hence we have
247  // to copy the string literal. This should be removed soon anyway.
248  if (autocomplete_attribute_value == "address" &&
249      CommandLine::ForCurrentProcess()->HasSwitch(
250          "enable-experimental-web-platform-features")) {
251    return HTML_TYPE_FULL_ADDRESS;
252  }
253
254  if (autocomplete_attribute_value == "cc-name")
255    return HTML_TYPE_CREDIT_CARD_NAME;
256
257  if (autocomplete_attribute_value == "cc-number")
258    return HTML_TYPE_CREDIT_CARD_NUMBER;
259
260  if (autocomplete_attribute_value == "cc-exp") {
261    if (field.max_length == 5)
262      return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
263    else if (field.max_length == 7)
264      return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
265    else
266      return HTML_TYPE_CREDIT_CARD_EXP;
267  }
268
269  if (autocomplete_attribute_value == "cc-exp-month")
270    return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
271
272  if (autocomplete_attribute_value == "cc-exp-year") {
273    if (field.max_length == 2)
274      return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
275    else if (field.max_length == 4)
276      return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
277    else
278      return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
279  }
280
281  if (autocomplete_attribute_value == "cc-csc")
282    return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
283
284  if (autocomplete_attribute_value == "cc-type")
285    return HTML_TYPE_CREDIT_CARD_TYPE;
286
287  if (autocomplete_attribute_value == "transaction-amount")
288    return HTML_TYPE_TRANSACTION_AMOUNT;
289
290  if (autocomplete_attribute_value == "transaction-currency")
291    return HTML_TYPE_TRANSACTION_CURRENCY;
292
293  if (autocomplete_attribute_value == "tel")
294    return HTML_TYPE_TEL;
295
296  if (autocomplete_attribute_value == "tel-country-code")
297    return HTML_TYPE_TEL_COUNTRY_CODE;
298
299  if (autocomplete_attribute_value == "tel-national")
300    return HTML_TYPE_TEL_NATIONAL;
301
302  if (autocomplete_attribute_value == "tel-area-code")
303    return HTML_TYPE_TEL_AREA_CODE;
304
305  if (autocomplete_attribute_value == "tel-local")
306    return HTML_TYPE_TEL_LOCAL;
307
308  if (autocomplete_attribute_value == "tel-local-prefix")
309    return HTML_TYPE_TEL_LOCAL_PREFIX;
310
311  if (autocomplete_attribute_value == "tel-local-suffix")
312    return HTML_TYPE_TEL_LOCAL_SUFFIX;
313
314  if (autocomplete_attribute_value == "email")
315    return HTML_TYPE_EMAIL;
316
317  return HTML_TYPE_UNKNOWN;
318}
319
320std::string StripDigitsIfRequired(const base::string16& input) {
321  UErrorCode status = U_ZERO_ERROR;
322  CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
323                         (kIgnorePatternInFieldName));
324  CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
325                         (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
326  DCHECK_EQ(status, U_ZERO_ERROR);
327
328  icu::UnicodeString icu_input(input.data(), input.length());
329  matcher.reset(icu_input);
330
331  icu::UnicodeString replaced_string = matcher.replaceAll("", status);
332
333  std::string return_string;
334  status = U_ZERO_ERROR;
335  base::UTF16ToUTF8(replaced_string.getBuffer(),
336                    static_cast<size_t>(replaced_string.length()),
337                    &return_string);
338  if (status != U_ZERO_ERROR) {
339    DVLOG(1) << "Couldn't strip digits in " << base::UTF16ToUTF8(input);
340    return base::UTF16ToUTF8(input);
341  }
342
343  return return_string;
344}
345
346}  // namespace
347
348FormStructure::FormStructure(const FormData& form)
349    : form_name_(form.name),
350      source_url_(form.origin),
351      target_url_(form.action),
352      autofill_count_(0),
353      active_field_count_(0),
354      upload_required_(USE_UPLOAD_RATES),
355      has_author_specified_types_(false) {
356  // Copy the form fields.
357  std::map<base::string16, size_t> unique_names;
358  for (std::vector<FormFieldData>::const_iterator field =
359           form.fields.begin();
360       field != form.fields.end(); ++field) {
361    if (!ShouldSkipField(*field)) {
362      // Add all supported form fields (including with empty names) to the
363      // signature.  This is a requirement for Autofill servers.
364      form_signature_field_names_.append("&");
365      form_signature_field_names_.append(StripDigitsIfRequired(field->name));
366
367      ++active_field_count_;
368    }
369
370    // Generate a unique name for this field by appending a counter to the name.
371    // Make sure to prepend the counter with a non-numeric digit so that we are
372    // guaranteed to avoid collisions.
373    if (!unique_names.count(field->name))
374      unique_names[field->name] = 1;
375    else
376      ++unique_names[field->name];
377    base::string16 unique_name = field->name + base::ASCIIToUTF16("_") +
378        base::IntToString16(unique_names[field->name]);
379    fields_.push_back(new AutofillField(*field, unique_name));
380  }
381}
382
383FormStructure::~FormStructure() {}
384
385void FormStructure::DetermineHeuristicTypes(
386    const AutofillMetrics& metric_logger) {
387  // First, try to detect field types based on each field's |autocomplete|
388  // attribute value.  If there is at least one form field that specifies an
389  // autocomplete type hint, don't try to apply other heuristics to match fields
390  // in this form.
391  bool has_author_specified_sections;
392  ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
393                                            &has_author_specified_sections);
394
395  if (!has_author_specified_types_) {
396    ServerFieldTypeMap field_type_map;
397    FormField::ParseFormFields(fields_.get(), &field_type_map);
398    for (size_t i = 0; i < field_count(); ++i) {
399      AutofillField* field = fields_[i];
400      ServerFieldTypeMap::iterator iter =
401          field_type_map.find(field->unique_name());
402      if (iter != field_type_map.end())
403        field->set_heuristic_type(iter->second);
404    }
405  }
406
407  UpdateAutofillCount();
408  IdentifySections(has_author_specified_sections);
409
410  if (IsAutofillable()) {
411    metric_logger.LogDeveloperEngagementMetric(
412        AutofillMetrics::FILLABLE_FORM_PARSED);
413    if (has_author_specified_types_) {
414      metric_logger.LogDeveloperEngagementMetric(
415          AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
416    }
417  }
418}
419
420bool FormStructure::EncodeUploadRequest(
421    const ServerFieldTypeSet& available_field_types,
422    bool form_was_autofilled,
423    std::string* encoded_xml) const {
424  DCHECK(ShouldBeCrowdsourced());
425
426  // Verify that |available_field_types| agrees with the possible field types we
427  // are uploading.
428  for (std::vector<AutofillField*>::const_iterator field = begin();
429       field != end();
430       ++field) {
431    for (ServerFieldTypeSet::const_iterator type =
432             (*field)->possible_types().begin();
433         type != (*field)->possible_types().end();
434         ++type) {
435      DCHECK(*type == UNKNOWN_TYPE ||
436             *type == EMPTY_TYPE ||
437             available_field_types.count(*type));
438    }
439  }
440
441  // Set up the <autofillupload> element and its attributes.
442  buzz::XmlElement autofill_request_xml(
443      (buzz::QName(kXMLElementAutofillUpload)));
444  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
445                               kClientVersion);
446  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
447                               FormSignature());
448  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
449                               form_was_autofilled ? "true" : "false");
450  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
451                               EncodeFieldTypes(available_field_types).c_str());
452
453  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
454    return false;  // Malformed form, skip it.
455
456  // Obtain the XML structure as a string.
457  *encoded_xml = kXMLDeclaration;
458  *encoded_xml += autofill_request_xml.Str().c_str();
459
460  // To enable this logging, run with the flag --vmodule="form_structure=2".
461  VLOG(2) << "\n" << *encoded_xml;
462
463  return true;
464}
465
466bool FormStructure::EncodeFieldAssignments(
467    const ServerFieldTypeSet& available_field_types,
468    std::string* encoded_xml) const {
469  DCHECK(ShouldBeCrowdsourced());
470
471  // Set up the <fieldassignments> element and its attributes.
472  buzz::XmlElement autofill_request_xml(
473      (buzz::QName(kXMLElementFieldAssignments)));
474  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
475                               FormSignature());
476
477  if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
478                         &autofill_request_xml))
479    return false;  // Malformed form, skip it.
480
481  // Obtain the XML structure as a string.
482  *encoded_xml = kXMLDeclaration;
483  *encoded_xml += autofill_request_xml.Str().c_str();
484
485  return true;
486}
487
488// static
489bool FormStructure::EncodeQueryRequest(
490    const std::vector<FormStructure*>& forms,
491    std::vector<std::string>* encoded_signatures,
492    std::string* encoded_xml) {
493  DCHECK(encoded_signatures);
494  DCHECK(encoded_xml);
495  encoded_xml->clear();
496  encoded_signatures->clear();
497  encoded_signatures->reserve(forms.size());
498
499  // Set up the <autofillquery> element and attributes.
500  buzz::XmlElement autofill_request_xml(
501      (buzz::QName(kXMLElementAutofillQuery)));
502  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
503                               kClientVersion);
504
505  // Some badly formatted web sites repeat forms - detect that and encode only
506  // one form as returned data would be the same for all the repeated forms.
507  std::set<std::string> processed_forms;
508  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
509       it != forms.end();
510       ++it) {
511    std::string signature((*it)->FormSignature());
512    if (processed_forms.find(signature) != processed_forms.end())
513      continue;
514    processed_forms.insert(signature);
515    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
516        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
517    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
518                                      signature);
519
520    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
521                                  encompassing_xml_element.get()))
522      continue;  // Malformed form, skip it.
523
524    autofill_request_xml.AddElement(encompassing_xml_element.release());
525    encoded_signatures->push_back(signature);
526  }
527
528  if (!encoded_signatures->size())
529    return false;
530
531  // Note: Chrome used to also set 'accepts="e"' (where 'e' is for experiments),
532  // but no longer sets this because support for experiments is deprecated.  If
533  // it ever resurfaces, re-add code here to set the attribute accordingly.
534
535  // Obtain the XML structure as a string.
536  *encoded_xml = kXMLDeclaration;
537  *encoded_xml += autofill_request_xml.Str().c_str();
538
539  return true;
540}
541
542// static
543void FormStructure::ParseQueryResponse(
544    const std::string& response_xml,
545    const std::vector<FormStructure*>& forms,
546    const AutofillMetrics& metric_logger) {
547  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
548
549  // Parse the field types from the server response to the query.
550  std::vector<AutofillServerFieldInfo> field_infos;
551  UploadRequired upload_required;
552  AutofillQueryXmlParser parse_handler(&field_infos,
553                                       &upload_required);
554  buzz::XmlParser parser(&parse_handler);
555  parser.Parse(response_xml.c_str(), response_xml.length(), true);
556  if (!parse_handler.succeeded())
557    return;
558
559  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
560
561  bool heuristics_detected_fillable_field = false;
562  bool query_response_overrode_heuristics = false;
563
564  // Copy the field types into the actual form.
565  std::vector<AutofillServerFieldInfo>::iterator current_info =
566      field_infos.begin();
567  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
568       iter != forms.end(); ++iter) {
569    FormStructure* form = *iter;
570    form->upload_required_ = upload_required;
571
572    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
573         field != form->fields_.end(); ++field) {
574      if (form->ShouldSkipField(**field))
575        continue;
576
577      // In some cases *successful* response does not return all the fields.
578      // Quit the update of the types then.
579      if (current_info == field_infos.end())
580        break;
581
582      // UNKNOWN_TYPE is reserved for use by the client.
583      DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
584
585      ServerFieldType heuristic_type = (*field)->heuristic_type();
586      if (heuristic_type != UNKNOWN_TYPE)
587        heuristics_detected_fillable_field = true;
588
589      (*field)->set_server_type(current_info->field_type);
590      if (heuristic_type != (*field)->Type().GetStorableType())
591        query_response_overrode_heuristics = true;
592
593      // Copy default value into the field if available.
594      if (!current_info->default_value.empty())
595        (*field)->set_default_value(current_info->default_value);
596
597      ++current_info;
598    }
599
600    form->UpdateAutofillCount();
601    form->IdentifySections(false);
602  }
603
604  AutofillMetrics::ServerQueryMetric metric;
605  if (query_response_overrode_heuristics) {
606    if (heuristics_detected_fillable_field) {
607      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
608    } else {
609      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
610    }
611  } else {
612    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
613  }
614  metric_logger.LogServerQueryMetric(metric);
615}
616
617// static
618void FormStructure::GetFieldTypePredictions(
619    const std::vector<FormStructure*>& form_structures,
620    std::vector<FormDataPredictions>* forms) {
621  forms->clear();
622  forms->reserve(form_structures.size());
623  for (size_t i = 0; i < form_structures.size(); ++i) {
624    FormStructure* form_structure = form_structures[i];
625    FormDataPredictions form;
626    form.data.name = form_structure->form_name_;
627    form.data.origin = form_structure->source_url_;
628    form.data.action = form_structure->target_url_;
629    form.signature = form_structure->FormSignature();
630
631    for (std::vector<AutofillField*>::const_iterator field =
632             form_structure->fields_.begin();
633         field != form_structure->fields_.end(); ++field) {
634      form.data.fields.push_back(FormFieldData(**field));
635
636      FormFieldDataPredictions annotated_field;
637      annotated_field.signature = (*field)->FieldSignature();
638      annotated_field.heuristic_type =
639          AutofillType((*field)->heuristic_type()).ToString();
640      annotated_field.server_type =
641          AutofillType((*field)->server_type()).ToString();
642      annotated_field.overall_type = (*field)->Type().ToString();
643      form.fields.push_back(annotated_field);
644    }
645
646    forms->push_back(form);
647  }
648}
649
650std::string FormStructure::FormSignature() const {
651  std::string scheme(target_url_.scheme());
652  std::string host(target_url_.host());
653
654  // If target host or scheme is empty, set scheme and host of source url.
655  // This is done to match the Toolbar's behavior.
656  if (scheme.empty() || host.empty()) {
657    scheme = source_url_.scheme();
658    host = source_url_.host();
659  }
660
661  std::string form_string = scheme + "://" + host + "&" +
662                            base::UTF16ToUTF8(form_name_) +
663                            form_signature_field_names_;
664
665  return Hash64Bit(form_string);
666}
667
668bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
669  return field.is_checkable;
670}
671
672bool FormStructure::IsAutofillable() const {
673  if (autofill_count() < kRequiredAutofillFields)
674    return false;
675
676  return ShouldBeParsed();
677}
678
679void FormStructure::UpdateAutofillCount() {
680  autofill_count_ = 0;
681  for (std::vector<AutofillField*>::const_iterator iter = begin();
682       iter != end(); ++iter) {
683    AutofillField* field = *iter;
684    if (field && field->IsFieldFillable())
685      ++autofill_count_;
686  }
687}
688
689bool FormStructure::ShouldBeParsed() const {
690  if (active_field_count() < kRequiredAutofillFields)
691    return false;
692
693  // Rule out http(s)://*/search?...
694  //  e.g. http://www.google.com/search?q=...
695  //       http://search.yahoo.com/search?p=...
696  if (target_url_.path() == "/search")
697    return false;
698
699  bool has_text_field = false;
700  for (std::vector<AutofillField*>::const_iterator it = begin();
701       it != end() && !has_text_field; ++it) {
702    has_text_field |= (*it)->form_control_type != "select-one";
703  }
704
705  return has_text_field;
706}
707
708bool FormStructure::ShouldBeCrowdsourced() const {
709  return !has_author_specified_types_ && ShouldBeParsed();
710}
711
712void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
713  // Map from field signatures to cached fields.
714  std::map<std::string, const AutofillField*> cached_fields;
715  for (size_t i = 0; i < cached_form.field_count(); ++i) {
716    const AutofillField* field = cached_form.field(i);
717    cached_fields[field->FieldSignature()] = field;
718  }
719
720  for (std::vector<AutofillField*>::const_iterator iter = begin();
721       iter != end(); ++iter) {
722    AutofillField* field = *iter;
723
724    std::map<std::string, const AutofillField*>::const_iterator
725        cached_field = cached_fields.find(field->FieldSignature());
726    if (cached_field != cached_fields.end()) {
727      if (field->form_control_type != "select-one" &&
728          field->value == cached_field->second->value) {
729        // From the perspective of learning user data, text fields containing
730        // default values are equivalent to empty fields.
731        field->value = base::string16();
732      }
733
734      field->set_heuristic_type(cached_field->second->heuristic_type());
735      field->set_server_type(cached_field->second->server_type());
736    }
737  }
738
739  UpdateAutofillCount();
740
741  // The form signature should match between query and upload requests to the
742  // server. On many websites, form elements are dynamically added, removed, or
743  // rearranged via JavaScript between page load and form submission, so we
744  // copy over the |form_signature_field_names_| corresponding to the query
745  // request.
746  DCHECK_EQ(cached_form.form_name_, form_name_);
747  DCHECK_EQ(cached_form.source_url_, source_url_);
748  DCHECK_EQ(cached_form.target_url_, target_url_);
749  form_signature_field_names_ = cached_form.form_signature_field_names_;
750}
751
752void FormStructure::LogQualityMetrics(
753    const AutofillMetrics& metric_logger,
754    const base::TimeTicks& load_time,
755    const base::TimeTicks& interaction_time,
756    const base::TimeTicks& submission_time) const {
757  size_t num_detected_field_types = 0;
758  bool did_autofill_all_possible_fields = true;
759  bool did_autofill_some_possible_fields = false;
760  for (size_t i = 0; i < field_count(); ++i) {
761    const AutofillField* field = this->field(i);
762
763    // No further logging for empty fields nor for fields where the entered data
764    // does not appear to already exist in the user's stored Autofill data.
765    const ServerFieldTypeSet& field_types = field->possible_types();
766    DCHECK(!field_types.empty());
767    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
768      continue;
769
770    // Similarly, no further logging for password fields.  Those are primarily
771    // related to a different feature code path, and so make more sense to track
772    // outside of this metric.
773    if (field->form_control_type == "password")
774      continue;
775
776    ++num_detected_field_types;
777    if (field->is_autofilled)
778      did_autofill_some_possible_fields = true;
779    else
780      did_autofill_all_possible_fields = false;
781
782    // Collapse field types that Chrome treats as identical, e.g. home and
783    // billing address fields.
784    ServerFieldTypeSet collapsed_field_types;
785    for (ServerFieldTypeSet::const_iterator it = field_types.begin();
786         it != field_types.end();
787         ++it) {
788      // Since we currently only support US phone numbers, the (city code + main
789      // digits) number is almost always identical to the whole phone number.
790      // TODO(isherman): Improve this logic once we add support for
791      // international numbers.
792      if (*it == PHONE_HOME_CITY_AND_NUMBER)
793        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
794      else
795        collapsed_field_types.insert(AutofillType(*it).GetStorableType());
796    }
797
798    // Capture the field's type, if it is unambiguous.
799    ServerFieldType field_type = UNKNOWN_TYPE;
800    if (collapsed_field_types.size() == 1)
801      field_type = *collapsed_field_types.begin();
802
803    ServerFieldType heuristic_type =
804        AutofillType(field->heuristic_type()).GetStorableType();
805    ServerFieldType server_type =
806        AutofillType(field->server_type()).GetStorableType();
807    ServerFieldType predicted_type = field->Type().GetStorableType();
808
809    // Log heuristic, server, and overall type quality metrics, independently of
810    // whether the field was autofilled.
811    if (heuristic_type == UNKNOWN_TYPE) {
812      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
813                                               field_type);
814    } else if (field_types.count(heuristic_type)) {
815      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
816                                               field_type);
817    } else {
818      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
819                                               field_type);
820    }
821
822    if (server_type == NO_SERVER_DATA) {
823      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
824                                            field_type);
825    } else if (field_types.count(server_type)) {
826      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
827                                            field_type);
828    } else {
829      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
830                                            field_type);
831    }
832
833    if (predicted_type == UNKNOWN_TYPE) {
834      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
835                                             field_type);
836    } else if (field_types.count(predicted_type)) {
837      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
838                                             field_type);
839    } else {
840      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
841                                             field_type);
842    }
843  }
844
845  if (num_detected_field_types < kRequiredAutofillFields) {
846    metric_logger.LogUserHappinessMetric(
847        AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
848  } else {
849    if (did_autofill_all_possible_fields) {
850      metric_logger.LogUserHappinessMetric(
851          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
852    } else if (did_autofill_some_possible_fields) {
853      metric_logger.LogUserHappinessMetric(
854          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
855    } else {
856      metric_logger.LogUserHappinessMetric(
857          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
858    }
859
860    // Unlike the other times, the |submission_time| should always be available.
861    DCHECK(!submission_time.is_null());
862
863    // The |load_time| might be unset, in the case that the form was dynamically
864    // added to the DOM.
865    if (!load_time.is_null()) {
866      // Submission should always chronologically follow form load.
867      DCHECK(submission_time > load_time);
868      base::TimeDelta elapsed = submission_time - load_time;
869      if (did_autofill_some_possible_fields)
870        metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
871      else
872        metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
873    }
874
875    // The |interaction_time| might be unset, in the case that the user
876    // submitted a blank form.
877    if (!interaction_time.is_null()) {
878      // Submission should always chronologically follow interaction.
879      DCHECK(submission_time > interaction_time);
880      base::TimeDelta elapsed = submission_time - interaction_time;
881      if (did_autofill_some_possible_fields) {
882        metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
883      } else {
884        metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
885            elapsed);
886      }
887    }
888  }
889}
890
891const AutofillField* FormStructure::field(size_t index) const {
892  if (index >= fields_.size()) {
893    NOTREACHED();
894    return NULL;
895  }
896
897  return fields_[index];
898}
899
900AutofillField* FormStructure::field(size_t index) {
901  return const_cast<AutofillField*>(
902      static_cast<const FormStructure*>(this)->field(index));
903}
904
905size_t FormStructure::field_count() const {
906  return fields_.size();
907}
908
909size_t FormStructure::active_field_count() const {
910  return active_field_count_;
911}
912
913FormData FormStructure::ToFormData() const {
914  // |data.user_submitted| will always be false.
915  FormData data;
916  data.name = form_name_;
917  data.origin = source_url_;
918  data.action = target_url_;
919
920  for (size_t i = 0; i < fields_.size(); ++i) {
921    data.fields.push_back(FormFieldData(*fields_[i]));
922  }
923
924  return data;
925}
926
927bool FormStructure::operator==(const FormData& form) const {
928  // TODO(jhawkins): Is this enough to differentiate a form?
929  if (form_name_ == form.name &&
930      source_url_ == form.origin &&
931      target_url_ == form.action) {
932    return true;
933  }
934
935  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
936  // set up.
937
938  return false;
939}
940
941bool FormStructure::operator!=(const FormData& form) const {
942  return !operator==(form);
943}
944
945std::string FormStructure::Hash64Bit(const std::string& str) {
946  std::string hash_bin = base::SHA1HashString(str);
947  DCHECK_EQ(20U, hash_bin.length());
948
949  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
950                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
951                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
952                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
953                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
954                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
955                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
956                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);
957
958  return base::Uint64ToString(hash64);
959}
960
961bool FormStructure::EncodeFormRequest(
962    FormStructure::EncodeRequestType request_type,
963    buzz::XmlElement* encompassing_xml_element) const {
964  if (!field_count())  // Nothing to add.
965    return false;
966
967  // Some badly formatted web sites repeat fields - limit number of fields to
968  // 48, which is far larger than any valid form and XML still fits into 2K.
969  // Do not send requests for forms with more than this many fields, as they are
970  // near certainly not valid/auto-fillable.
971  const size_t kMaxFieldsOnTheForm = 48;
972  if (field_count() > kMaxFieldsOnTheForm)
973    return false;
974
975  // Add the child nodes for the form fields.
976  for (size_t index = 0; index < field_count(); ++index) {
977    const AutofillField* field = fields_[index];
978    switch (request_type) {
979      case FormStructure::UPLOAD:
980        EncodeFieldForUpload(*field, encompassing_xml_element);
981        break;
982      case FormStructure::QUERY:
983        if (ShouldSkipField(*field))
984          continue;
985        EncodeFieldForQuery(*field, encompassing_xml_element);
986        break;
987      case FormStructure::FIELD_ASSIGNMENTS:
988        EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
989        break;
990    }
991  }
992  return true;
993}
994
995void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
996    bool* found_types,
997    bool* found_sections) {
998  const std::string kDefaultSection = "-default";
999
1000  *found_types = false;
1001  *found_sections = false;
1002  for (std::vector<AutofillField*>::iterator it = fields_.begin();
1003       it != fields_.end(); ++it) {
1004    AutofillField* field = *it;
1005
1006    // To prevent potential section name collisions, add a default suffix for
1007    // other fields.  Without this, 'autocomplete' attribute values
1008    // "section--shipping street-address" and "shipping street-address" would be
1009    // parsed identically, given the section handling code below.  We do this
1010    // before any validation so that fields with invalid attributes still end up
1011    // in the default section.  These default section names will be overridden
1012    // by subsequent heuristic parsing steps if there are no author-specified
1013    // section names.
1014    field->set_section(kDefaultSection);
1015
1016    // Canonicalize the attribute value by trimming whitespace, collapsing
1017    // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1018    std::string autocomplete_attribute =
1019        base::CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1020    autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1021
1022    // The autocomplete attribute is overloaded: it can specify either a field
1023    // type hint or whether autocomplete should be enabled at all.  Ignore the
1024    // latter type of attribute value.
1025    if (autocomplete_attribute.empty() ||
1026        autocomplete_attribute == "on" ||
1027        autocomplete_attribute == "off") {
1028      continue;
1029    }
1030
1031    // Any other value, even it is invalid, is considered to be a type hint.
1032    // This allows a website's author to specify an attribute like
1033    // autocomplete="other" on a field to disable all Autofill heuristics for
1034    // the form.
1035    *found_types = true;
1036
1037    // Tokenize the attribute value.  Per the spec, the tokens are parsed in
1038    // reverse order.
1039    std::vector<std::string> tokens;
1040    Tokenize(autocomplete_attribute, " ", &tokens);
1041
1042    // The final token must be the field type.
1043    // If it is not one of the known types, abort.
1044    DCHECK(!tokens.empty());
1045    std::string field_type_token = tokens.back();
1046    tokens.pop_back();
1047    HtmlFieldType field_type =
1048        FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1049    if (field_type == HTML_TYPE_UNKNOWN)
1050      continue;
1051
1052    // The preceding token, if any, may be a type hint.
1053    if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1054      // If it is, it must match the field type; otherwise, abort.
1055      // Note that an invalid token invalidates the entire attribute value, even
1056      // if the other tokens are valid.
1057      if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1058        continue;
1059
1060      // Chrome Autofill ignores these type hints.
1061      tokens.pop_back();
1062    }
1063
1064    // The preceding token, if any, may be a fixed string that is either
1065    // "shipping" or "billing".  Chrome Autofill treats these as implicit
1066    // section name suffixes.
1067    DCHECK_EQ(kDefaultSection, field->section());
1068    std::string section = field->section();
1069    HtmlFieldMode mode = HTML_MODE_NONE;
1070    if (!tokens.empty()) {
1071      if (tokens.back() == kShippingMode)
1072        mode = HTML_MODE_SHIPPING;
1073      else if (tokens.back() == kBillingMode)
1074        mode = HTML_MODE_BILLING;
1075    }
1076
1077    if (mode != HTML_MODE_NONE) {
1078      section = "-" + tokens.back();
1079      tokens.pop_back();
1080    }
1081
1082    // The preceding token, if any, may be a named section.
1083    const std::string kSectionPrefix = "section-";
1084    if (!tokens.empty() &&
1085        StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1086      // Prepend this section name to the suffix set in the preceding block.
1087      section = tokens.back().substr(kSectionPrefix.size()) + section;
1088      tokens.pop_back();
1089    }
1090
1091    // No other tokens are allowed.  If there are any remaining, abort.
1092    if (!tokens.empty())
1093      continue;
1094
1095    if (section != kDefaultSection) {
1096      *found_sections = true;
1097      field->set_section(section);
1098    }
1099
1100    // No errors encountered while parsing!
1101    // Update the |field|'s type based on what was parsed from the attribute.
1102    field->SetHtmlType(field_type, mode);
1103  }
1104}
1105
1106bool FormStructure::FillFields(
1107    const std::vector<ServerFieldType>& types,
1108    const InputFieldComparator& matches,
1109    const base::Callback<base::string16(const AutofillType&)>& get_info,
1110    const std::string& app_locale) {
1111  bool filled_something = false;
1112  for (size_t i = 0; i < field_count(); ++i) {
1113    for (size_t j = 0; j < types.size(); ++j) {
1114      if (matches.Run(types[j], *field(i))) {
1115        AutofillField::FillFormField(*field(i),
1116                                     get_info.Run(field(i)->Type()),
1117                                     app_locale,
1118                                     field(i));
1119        filled_something = true;
1120        break;
1121      }
1122    }
1123  }
1124  return filled_something;
1125}
1126
1127std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) {
1128  std::set<base::string16> values;
1129  AutofillType target_type(type);
1130  for (std::vector<AutofillField*>::iterator iter = fields_.begin();
1131       iter != fields_.end(); ++iter) {
1132    AutofillField* field = *iter;
1133    if (field->Type().GetStorableType() != target_type.GetStorableType() ||
1134        field->Type().group() != target_type.group()) {
1135      continue;
1136    }
1137
1138    // No option values; anything goes.
1139    if (field->option_values.empty())
1140      return std::set<base::string16>();
1141
1142    for (size_t i = 0; i < field->option_values.size(); ++i) {
1143      if (!field->option_values[i].empty())
1144        values.insert(base::i18n::ToUpper(field->option_values[i]));
1145    }
1146
1147    for (size_t i = 0; i < field->option_contents.size(); ++i) {
1148      if (!field->option_contents[i].empty())
1149        values.insert(base::i18n::ToUpper(field->option_contents[i]));
1150    }
1151  }
1152
1153  return values;
1154}
1155
1156base::string16 FormStructure::GetUniqueValue(HtmlFieldType type) const {
1157  base::string16 value;
1158  for (std::vector<AutofillField*>::const_iterator iter = fields_.begin();
1159       iter != fields_.end(); ++iter) {
1160    const AutofillField* field = *iter;
1161    if (field->html_type() != type)
1162      continue;
1163
1164    // More than one value found; abort rather than choosing one arbitrarily.
1165    if (!value.empty() && !field->value.empty())
1166      return base::string16();
1167
1168    value = field->value;
1169  }
1170
1171  return value;
1172}
1173
1174void FormStructure::IdentifySections(bool has_author_specified_sections) {
1175  if (fields_.empty())
1176    return;
1177
1178  if (!has_author_specified_sections) {
1179    // Name sections after the first field in the section.
1180    base::string16 current_section = fields_.front()->unique_name();
1181
1182    // Keep track of the types we've seen in this section.
1183    std::set<ServerFieldType> seen_types;
1184    ServerFieldType previous_type = UNKNOWN_TYPE;
1185
1186    for (std::vector<AutofillField*>::iterator field = fields_.begin();
1187         field != fields_.end(); ++field) {
1188      const ServerFieldType current_type = (*field)->Type().GetStorableType();
1189
1190      bool already_saw_current_type = seen_types.count(current_type) > 0;
1191
1192      // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1193      // evening phone number.  Our phone number detection is also generally a
1194      // little off.  Hence, ignore this field type as a signal here.
1195      if (AutofillType(current_type).group() == PHONE_HOME)
1196        already_saw_current_type = false;
1197
1198      // Some forms have adjacent fields of the same type.  Two common examples:
1199      //  * Forms with two email fields, where the second is meant to "confirm"
1200      //    the first.
1201      //  * Forms with a <select> menu for states in some countries, and a
1202      //    freeform <input> field for states in other countries.  (Usually,
1203      //    only one of these two will be visible for any given choice of
1204      //    country.)
1205      // Generally, adjacent fields of the same type belong in the same logical
1206      // section.
1207      if (current_type == previous_type)
1208        already_saw_current_type = false;
1209
1210      previous_type = current_type;
1211
1212      if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1213        // We reached the end of a section, so start a new section.
1214        seen_types.clear();
1215        current_section = (*field)->unique_name();
1216      }
1217
1218      seen_types.insert(current_type);
1219      (*field)->set_section(base::UTF16ToUTF8(current_section));
1220    }
1221  }
1222
1223  // Ensure that credit card and address fields are in separate sections.
1224  // This simplifies the section-aware logic in autofill_manager.cc.
1225  for (std::vector<AutofillField*>::iterator field = fields_.begin();
1226       field != fields_.end(); ++field) {
1227    FieldTypeGroup field_type_group = (*field)->Type().group();
1228    if (field_type_group == CREDIT_CARD)
1229      (*field)->set_section((*field)->section() + "-cc");
1230    else
1231      (*field)->set_section((*field)->section() + "-default");
1232  }
1233}
1234
1235}  // namespace autofill
1236