form_structure.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/logging.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/sha1.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_util.h"
16#include "base/strings/stringprintf.h"
17#include "base/strings/utf_string_conversions.h"
18#include "base/time/time.h"
19#include "components/autofill/content/browser/autocheckout_page_meta_data.h"
20#include "components/autofill/core/browser/autofill_metrics.h"
21#include "components/autofill/core/browser/autofill_type.h"
22#include "components/autofill/core/browser/autofill_xml_parser.h"
23#include "components/autofill/core/browser/field_types.h"
24#include "components/autofill/core/browser/form_field.h"
25#include "components/autofill/core/common/autofill_constants.h"
26#include "components/autofill/core/common/form_data.h"
27#include "components/autofill/core/common/form_data_predictions.h"
28#include "components/autofill/core/common/form_field_data.h"
29#include "components/autofill/core/common/form_field_data_predictions.h"
30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
31
32namespace autofill {
33namespace {
34
35const char kFormMethodPost[] = "post";
36
37// XML elements and attributes.
38const char kAttributeAcceptedFeatures[] = "accepts";
39const char kAttributeAutofillUsed[] = "autofillused";
40const char kAttributeAutofillType[] = "autofilltype";
41const char kAttributeClientVersion[] = "clientversion";
42const char kAttributeDataPresent[] = "datapresent";
43const char kAttributeFieldID[] = "fieldid";
44const char kAttributeFieldType[] = "fieldtype";
45const char kAttributeFormSignature[] = "formsignature";
46const char kAttributeName[] = "name";
47const char kAttributeSignature[] = "signature";
48const char kAttributeUrlprefixSignature[] = "urlprefixsignature";
49const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
50const char kAcceptedFeaturesAutocheckoutExperiment[] = "a,e"; // a=autocheckout
51const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
52const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
53const char kXMLElementAutofillQuery[] = "autofillquery";
54const char kXMLElementAutofillUpload[] = "autofillupload";
55const char kXMLElementFieldAssignments[] = "fieldassignments";
56const char kXMLElementField[] = "field";
57const char kXMLElementFields[] = "fields";
58const char kXMLElementForm[] = "form";
59const char kBillingSection[] = "billing";
60const char kShippingSection[] = "shipping";
61
62// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
63// |available_field_types| and returns the hex representation as a string.
64std::string EncodeFieldTypes(const FieldTypeSet& available_field_types) {
65  // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
66  // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
67  const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
68
69  // Pack the types in |available_field_types| into |bit_field|.
70  std::vector<uint8> bit_field(kNumBytes, 0);
71  for (FieldTypeSet::const_iterator field_type = available_field_types.begin();
72       field_type != available_field_types.end();
73       ++field_type) {
74    // Set the appropriate bit in the field.  The bit we set is the one
75    // |field_type| % 8 from the left of the byte.
76    const size_t byte = *field_type / 8;
77    const size_t bit = 0x80 >> (*field_type % 8);
78    DCHECK(byte < bit_field.size());
79    bit_field[byte] |= bit;
80  }
81
82  // Discard any trailing zeroes.
83  // If there are no available types, we return the empty string.
84  size_t data_end = bit_field.size();
85  for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
86  }
87
88  // Print all meaningfull bytes into a string.
89  std::string data_presence;
90  data_presence.reserve(data_end * 2 + 1);
91  for (size_t i = 0; i < data_end; ++i) {
92    base::StringAppendF(&data_presence, "%02x", bit_field[i]);
93  }
94
95  return data_presence;
96}
97
98// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
99// in upload xml, and also add them to the parent XmlElement.
100void EncodeFieldForUpload(const AutofillField& field,
101                          buzz::XmlElement* parent) {
102  // Don't upload checkable fields.
103  if (field.is_checkable)
104    return;
105
106  FieldTypeSet types = field.possible_types();
107  // |types| could be empty in unit-tests only.
108  for (FieldTypeSet::iterator field_type = types.begin();
109       field_type != types.end(); ++field_type) {
110    buzz::XmlElement *field_element = new buzz::XmlElement(
111        buzz::QName(kXMLElementField));
112
113    field_element->SetAttr(buzz::QName(kAttributeSignature),
114                           field.FieldSignature());
115    field_element->SetAttr(buzz::QName(kAttributeAutofillType),
116                           base::IntToString(*field_type));
117    parent->AddElement(field_element);
118  }
119}
120
121// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
122// in query xml, and also add it to the parent XmlElement.
123void EncodeFieldForQuery(const AutofillField& field,
124                         buzz::XmlElement* parent) {
125  buzz::XmlElement *field_element = new buzz::XmlElement(
126      buzz::QName(kXMLElementField));
127  field_element->SetAttr(buzz::QName(kAttributeSignature),
128                         field.FieldSignature());
129  parent->AddElement(field_element);
130}
131
132// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
133// in field assignments xml, and also add them to the parent XmlElement.
134void EncodeFieldForFieldAssignments(const AutofillField& field,
135                                    buzz::XmlElement* parent) {
136  FieldTypeSet types = field.possible_types();
137  for (FieldTypeSet::iterator field_type = types.begin();
138       field_type != types.end(); ++field_type) {
139    buzz::XmlElement *field_element = new buzz::XmlElement(
140        buzz::QName(kXMLElementFields));
141
142    field_element->SetAttr(buzz::QName(kAttributeFieldID),
143                           field.FieldSignature());
144    field_element->SetAttr(buzz::QName(kAttributeFieldType),
145                           base::IntToString(*field_type));
146    field_element->SetAttr(buzz::QName(kAttributeName),
147                           UTF16ToUTF8(field.name));
148    parent->AddElement(field_element);
149  }
150}
151
152// Returns |true| iff the |token| is a type hint for a contact field, as
153// specified in the implementation section of http://is.gd/whatwg_autocomplete
154// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
155// support filling either type of information.
156bool IsContactTypeHint(const std::string& token) {
157  return token == "home" || token == "work" || token == "mobile";
158}
159
160// Returns |true| iff the |token| is a type hint appropriate for a field of the
161// given |field_type|, as specified in the implementation section of
162// http://is.gd/whatwg_autocomplete
163bool ContactTypeHintMatchesFieldType(const std::string& token,
164                                     AutofillFieldType field_type) {
165  // The "home" and "work" type hints are only appropriate for email and phone
166  // number field types.
167  if (token == "home" || token == "work") {
168    return field_type == EMAIL_ADDRESS ||
169        (field_type >= PHONE_HOME_NUMBER &&
170         field_type <= PHONE_HOME_WHOLE_NUMBER);
171  }
172
173  // The "mobile" type hint is only appropriate for phone number field types.
174  // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
175  // support filling either type of information.
176  if (token == "mobile") {
177    return field_type >= PHONE_HOME_NUMBER &&
178        field_type <= PHONE_HOME_WHOLE_NUMBER;
179  }
180
181  return false;
182}
183
184// Returns the Chrome Autofill-supported field type corresponding to the given
185// |autocomplete_type|, if there is one, in the context of the given |field|.
186// Chrome Autofill supports a subset of the field types listed at
187// http://is.gd/whatwg_autocomplete
188AutofillFieldType FieldTypeFromAutocompleteType(
189    const std::string& autocomplete_type,
190    const AutofillField& field) {
191  if (autocomplete_type == "name")
192    return NAME_FULL;
193
194  if (autocomplete_type == "given-name")
195    return NAME_FIRST;
196
197  if (autocomplete_type == "additional-name") {
198    if (field.max_length == 1)
199      return NAME_MIDDLE_INITIAL;
200    else
201      return NAME_MIDDLE;
202  }
203
204  if (autocomplete_type == "family-name")
205    return NAME_LAST;
206
207  if (autocomplete_type == "honorific-suffix")
208    return NAME_SUFFIX;
209
210  if (autocomplete_type == "organization")
211    return COMPANY_NAME;
212
213  if (autocomplete_type == "street-address" ||
214      autocomplete_type == "address-line1")
215    return ADDRESS_HOME_LINE1;
216
217  if (autocomplete_type == "address-line2")
218    return ADDRESS_HOME_LINE2;
219
220  if (autocomplete_type == "locality")
221    return ADDRESS_HOME_CITY;
222
223  if (autocomplete_type == "region")
224    return ADDRESS_HOME_STATE;
225
226  if (autocomplete_type == "country")
227    return ADDRESS_HOME_COUNTRY;
228
229  if (autocomplete_type == "postal-code")
230    return ADDRESS_HOME_ZIP;
231
232  if (autocomplete_type == "cc-name")
233    return CREDIT_CARD_NAME;
234
235  if (autocomplete_type == "cc-number")
236    return CREDIT_CARD_NUMBER;
237
238  if (autocomplete_type == "cc-exp") {
239    if (field.max_length == 5)
240      return CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
241    else
242      return CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
243  }
244
245  if (autocomplete_type == "cc-exp-month")
246    return CREDIT_CARD_EXP_MONTH;
247
248  if (autocomplete_type == "cc-exp-year") {
249    if (field.max_length == 2)
250      return CREDIT_CARD_EXP_2_DIGIT_YEAR;
251    else
252      return CREDIT_CARD_EXP_4_DIGIT_YEAR;
253  }
254
255  if (autocomplete_type == "cc-csc")
256    return CREDIT_CARD_VERIFICATION_CODE;
257
258  if (autocomplete_type == "cc-type")
259    return CREDIT_CARD_TYPE;
260
261  if (autocomplete_type == "tel")
262    return PHONE_HOME_WHOLE_NUMBER;
263
264  if (autocomplete_type == "tel-country-code")
265    return PHONE_HOME_COUNTRY_CODE;
266
267  if (autocomplete_type == "tel-national")
268    return PHONE_HOME_CITY_AND_NUMBER;
269
270  if (autocomplete_type == "tel-area-code")
271    return PHONE_HOME_CITY_CODE;
272
273  if (autocomplete_type == "tel-local")
274    return PHONE_HOME_NUMBER;
275
276  if (autocomplete_type == "tel-local-prefix")
277    return PHONE_HOME_NUMBER;
278
279  if (autocomplete_type == "tel-local-suffix")
280    return PHONE_HOME_NUMBER;
281
282  if (autocomplete_type == "email")
283    return EMAIL_ADDRESS;
284
285  return UNKNOWN_TYPE;
286}
287
288}  // namespace
289
290FormStructure::FormStructure(const FormData& form,
291                             const std::string& autocheckout_url_prefix)
292    : form_name_(form.name),
293      source_url_(form.origin),
294      target_url_(form.action),
295      autofill_count_(0),
296      active_field_count_(0),
297      upload_required_(USE_UPLOAD_RATES),
298      server_experiment_id_("no server response"),
299      has_author_specified_types_(false),
300      autocheckout_url_prefix_(autocheckout_url_prefix),
301      filled_by_autocheckout_(false) {
302  // Copy the form fields.
303  std::map<base::string16, size_t> unique_names;
304  for (std::vector<FormFieldData>::const_iterator field =
305           form.fields.begin();
306       field != form.fields.end(); field++) {
307
308    if (!ShouldSkipField(*field)) {
309      // Add all supported form fields (including with empty names) to the
310      // signature.  This is a requirement for Autofill servers.
311      form_signature_field_names_.append("&");
312      form_signature_field_names_.append(UTF16ToUTF8(field->name));
313
314      ++active_field_count_;
315    }
316
317    // Generate a unique name for this field by appending a counter to the name.
318    // Make sure to prepend the counter with a non-numeric digit so that we are
319    // guaranteed to avoid collisions.
320    if (!unique_names.count(field->name))
321      unique_names[field->name] = 1;
322    else
323      ++unique_names[field->name];
324    base::string16 unique_name = field->name + ASCIIToUTF16("_") +
325        base::IntToString16(unique_names[field->name]);
326    fields_.push_back(new AutofillField(*field, unique_name));
327  }
328
329  std::string method = UTF16ToUTF8(form.method);
330  if (StringToLowerASCII(method) == kFormMethodPost) {
331    method_ = POST;
332  } else {
333    // Either the method is 'get', or we don't know.  In this case we default
334    // to GET.
335    method_ = GET;
336  }
337}
338
339FormStructure::~FormStructure() {}
340
341void FormStructure::DetermineHeuristicTypes(
342    const AutofillMetrics& metric_logger) {
343  // First, try to detect field types based on each field's |autocomplete|
344  // attribute value.  If there is at least one form field that specifies an
345  // autocomplete type hint, don't try to apply other heuristics to match fields
346  // in this form.
347  bool has_author_specified_sections;
348  ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
349                                            &has_author_specified_sections);
350
351  if (!has_author_specified_types_) {
352    FieldTypeMap field_type_map;
353    FormField::ParseFormFields(fields_.get(), &field_type_map);
354    for (size_t index = 0; index < field_count(); index++) {
355      AutofillField* field = fields_[index];
356      FieldTypeMap::iterator iter = field_type_map.find(field->unique_name());
357      if (iter != field_type_map.end())
358        field->set_heuristic_type(iter->second);
359    }
360  }
361
362  UpdateAutofillCount();
363  IdentifySections(has_author_specified_sections);
364
365  if (IsAutofillable(true)) {
366    metric_logger.LogDeveloperEngagementMetric(
367        AutofillMetrics::FILLABLE_FORM_PARSED);
368    if (has_author_specified_types_) {
369      metric_logger.LogDeveloperEngagementMetric(
370          AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
371    }
372  }
373}
374
375bool FormStructure::EncodeUploadRequest(
376    const FieldTypeSet& available_field_types,
377    bool form_was_autofilled,
378    std::string* encoded_xml) const {
379  DCHECK(ShouldBeCrowdsourced());
380
381  // Verify that |available_field_types| agrees with the possible field types we
382  // are uploading.
383  for (std::vector<AutofillField*>::const_iterator field = begin();
384       field != end();
385       ++field) {
386    for (FieldTypeSet::const_iterator type = (*field)->possible_types().begin();
387         type != (*field)->possible_types().end();
388         ++type) {
389      DCHECK(*type == UNKNOWN_TYPE ||
390             *type == EMPTY_TYPE ||
391             available_field_types.count(*type));
392    }
393  }
394
395  // Set up the <autofillupload> element and its attributes.
396  buzz::XmlElement autofill_request_xml(
397      (buzz::QName(kXMLElementAutofillUpload)));
398  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
399                               kClientVersion);
400  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
401                               FormSignature());
402  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
403                               form_was_autofilled ? "true" : "false");
404  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
405                               EncodeFieldTypes(available_field_types).c_str());
406
407  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
408    return false;  // Malformed form, skip it.
409
410  // Obtain the XML structure as a string.
411  *encoded_xml = kXMLDeclaration;
412  *encoded_xml += autofill_request_xml.Str().c_str();
413
414  // To enable this logging, run with the flag --vmodule="form_structure=2".
415  VLOG(2) << "\n" << *encoded_xml;
416
417  return true;
418}
419
420bool FormStructure::EncodeFieldAssignments(
421    const FieldTypeSet& available_field_types,
422    std::string* encoded_xml) const {
423  DCHECK(ShouldBeCrowdsourced());
424
425  // Set up the <fieldassignments> element and its attributes.
426  buzz::XmlElement autofill_request_xml(
427      (buzz::QName(kXMLElementFieldAssignments)));
428  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
429                               FormSignature());
430
431  if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
432                         &autofill_request_xml))
433    return false;  // Malformed form, skip it.
434
435  // Obtain the XML structure as a string.
436  *encoded_xml = kXMLDeclaration;
437  *encoded_xml += autofill_request_xml.Str().c_str();
438
439  return true;
440}
441
442// static
443bool FormStructure::EncodeQueryRequest(
444    const std::vector<FormStructure*>& forms,
445    std::vector<std::string>* encoded_signatures,
446    std::string* encoded_xml) {
447  DCHECK(encoded_signatures);
448  DCHECK(encoded_xml);
449  encoded_xml->clear();
450  encoded_signatures->clear();
451  encoded_signatures->reserve(forms.size());
452
453  // Set up the <autofillquery> element and attributes.
454  buzz::XmlElement autofill_request_xml(
455      (buzz::QName(kXMLElementAutofillQuery)));
456  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
457                               kClientVersion);
458
459  // autocheckout_url_prefix tells the Autofill server where the forms in the
460  // request came from, and the the Autofill server checks internal status and
461  // decide to enable Autocheckout or not and may return Autocheckout related
462  // data in the response accordingly.
463  // There is no page/frame level object associated with FormStructure that
464  // we could extract URL prefix from. But, all the forms should come from the
465  // same frame, so they should have the same Autocheckout URL prefix. Thus we
466  // use URL prefix from the first form with Autocheckout enabled.
467  std::string autocheckout_url_prefix;
468
469  // Some badly formatted web sites repeat forms - detect that and encode only
470  // one form as returned data would be the same for all the repeated forms.
471  std::set<std::string> processed_forms;
472  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
473       it != forms.end();
474       ++it) {
475    std::string signature((*it)->FormSignature());
476    if (processed_forms.find(signature) != processed_forms.end())
477      continue;
478    processed_forms.insert(signature);
479    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
480        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
481    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
482                                      signature);
483
484    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
485                                  encompassing_xml_element.get()))
486      continue;  // Malformed form, skip it.
487
488    if ((*it)->IsAutocheckoutEnabled()) {
489      if (autocheckout_url_prefix.empty()) {
490        autocheckout_url_prefix = (*it)->autocheckout_url_prefix_;
491      } else {
492        // Making sure all the forms in the request has the same url_prefix.
493        DCHECK_EQ(autocheckout_url_prefix, (*it)->autocheckout_url_prefix_);
494      }
495    }
496
497    autofill_request_xml.AddElement(encompassing_xml_element.release());
498    encoded_signatures->push_back(signature);
499  }
500
501  if (!encoded_signatures->size())
502    return false;
503
504  if (autocheckout_url_prefix.empty()) {
505    autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
506                                 kAcceptedFeaturesExperiment);
507  } else {
508    autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
509                                 kAcceptedFeaturesAutocheckoutExperiment);
510    autofill_request_xml.SetAttr(buzz::QName(kAttributeUrlprefixSignature),
511                                 Hash64Bit(autocheckout_url_prefix));
512  }
513
514  // Obtain the XML structure as a string.
515  *encoded_xml = kXMLDeclaration;
516  *encoded_xml += autofill_request_xml.Str().c_str();
517
518  return true;
519}
520
521// static
522void FormStructure::ParseQueryResponse(
523    const std::string& response_xml,
524    const std::vector<FormStructure*>& forms,
525    autofill::AutocheckoutPageMetaData* page_meta_data,
526    const AutofillMetrics& metric_logger) {
527  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
528
529  // Parse the field types from the server response to the query.
530  std::vector<AutofillServerFieldInfo> field_infos;
531  UploadRequired upload_required;
532  std::string experiment_id;
533  AutofillQueryXmlParser parse_handler(&field_infos,
534                                       &upload_required,
535                                       &experiment_id,
536                                       page_meta_data);
537  buzz::XmlParser parser(&parse_handler);
538  parser.Parse(response_xml.c_str(), response_xml.length(), true);
539  if (!parse_handler.succeeded())
540    return;
541
542  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
543  metric_logger.LogServerExperimentIdForQuery(experiment_id);
544
545  bool heuristics_detected_fillable_field = false;
546  bool query_response_overrode_heuristics = false;
547
548  // Copy the field types into the actual form.
549  std::vector<AutofillServerFieldInfo>::iterator current_info =
550      field_infos.begin();
551  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
552       iter != forms.end(); ++iter) {
553    FormStructure* form = *iter;
554    form->upload_required_ = upload_required;
555    form->server_experiment_id_ = experiment_id;
556
557    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
558         field != form->fields_.end(); ++field) {
559      if (form->ShouldSkipField(**field))
560        continue;
561
562      // In some cases *successful* response does not return all the fields.
563      // Quit the update of the types then.
564      if (current_info == field_infos.end())
565        break;
566
567      // UNKNOWN_TYPE is reserved for use by the client.
568      DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
569
570      AutofillFieldType heuristic_type = (*field)->type();
571      if (heuristic_type != UNKNOWN_TYPE)
572        heuristics_detected_fillable_field = true;
573
574      (*field)->set_server_type(current_info->field_type);
575      if (heuristic_type != (*field)->type())
576        query_response_overrode_heuristics = true;
577
578      // Copy default value into the field if available.
579      if (!current_info->default_value.empty())
580        (*field)->set_default_value(current_info->default_value);
581
582      ++current_info;
583    }
584
585    form->UpdateAutofillCount();
586    form->IdentifySections(false);
587  }
588
589  AutofillMetrics::ServerQueryMetric metric;
590  if (query_response_overrode_heuristics) {
591    if (heuristics_detected_fillable_field) {
592      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
593    } else {
594      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
595    }
596  } else {
597    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
598  }
599  metric_logger.LogServerQueryMetric(metric);
600}
601
602// static
603void FormStructure::GetFieldTypePredictions(
604    const std::vector<FormStructure*>& form_structures,
605    std::vector<FormDataPredictions>* forms) {
606  forms->clear();
607  forms->reserve(form_structures.size());
608  for (size_t i = 0; i < form_structures.size(); ++i) {
609    FormStructure* form_structure = form_structures[i];
610    FormDataPredictions form;
611    form.data.name = form_structure->form_name_;
612    form.data.method =
613        ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
614    form.data.origin = form_structure->source_url_;
615    form.data.action = form_structure->target_url_;
616    form.signature = form_structure->FormSignature();
617    form.experiment_id = form_structure->server_experiment_id_;
618
619    for (std::vector<AutofillField*>::const_iterator field =
620             form_structure->fields_.begin();
621         field != form_structure->fields_.end(); ++field) {
622      form.data.fields.push_back(FormFieldData(**field));
623
624      FormFieldDataPredictions annotated_field;
625      annotated_field.signature = (*field)->FieldSignature();
626      annotated_field.heuristic_type =
627          AutofillType::FieldTypeToString((*field)->heuristic_type());
628      annotated_field.server_type =
629          AutofillType::FieldTypeToString((*field)->server_type());
630      annotated_field.overall_type =
631          AutofillType::FieldTypeToString((*field)->type());
632      form.fields.push_back(annotated_field);
633    }
634
635    forms->push_back(form);
636  }
637}
638
639std::string FormStructure::FormSignature() const {
640  std::string scheme(target_url_.scheme());
641  std::string host(target_url_.host());
642
643  // If target host or scheme is empty, set scheme and host of source url.
644  // This is done to match the Toolbar's behavior.
645  if (scheme.empty() || host.empty()) {
646    scheme = source_url_.scheme();
647    host = source_url_.host();
648  }
649
650  std::string form_string = scheme + "://" + host + "&" +
651                            UTF16ToUTF8(form_name_) +
652                            form_signature_field_names_;
653
654  return Hash64Bit(form_string);
655}
656
657bool FormStructure::IsAutocheckoutEnabled() const {
658  return !autocheckout_url_prefix_.empty();
659}
660
661bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
662  return (field.is_checkable || field.form_control_type == "password") &&
663      !IsAutocheckoutEnabled();
664}
665
666size_t FormStructure::RequiredFillableFields() const {
667  return IsAutocheckoutEnabled() ? 0 : kRequiredAutofillFields;
668}
669
670bool FormStructure::IsAutofillable(bool require_method_post) const {
671  if (autofill_count() < RequiredFillableFields())
672    return false;
673
674  return ShouldBeParsed(require_method_post);
675}
676
677void FormStructure::UpdateAutofillCount() {
678  autofill_count_ = 0;
679  for (std::vector<AutofillField*>::const_iterator iter = begin();
680       iter != end(); ++iter) {
681    AutofillField* field = *iter;
682    if (field && field->IsFieldFillable())
683      ++autofill_count_;
684  }
685}
686
687bool FormStructure::ShouldBeParsed(bool require_method_post) const {
688  if (active_field_count() < RequiredFillableFields())
689    return false;
690
691  // Rule out http(s)://*/search?...
692  //  e.g. http://www.google.com/search?q=...
693  //       http://search.yahoo.com/search?p=...
694  if (target_url_.path() == "/search")
695    return false;
696
697  if (!IsAutocheckoutEnabled()) {
698    // Make sure there is at least one text field when Autocheckout is
699    // not enabled.
700    bool has_text_field = false;
701    for (std::vector<AutofillField*>::const_iterator it = begin();
702         it != end() && !has_text_field; ++it) {
703      has_text_field |= (*it)->form_control_type != "select-one";
704    }
705    if (!has_text_field)
706      return false;
707  }
708
709  return !require_method_post || (method_ == POST);
710}
711
712bool FormStructure::ShouldBeCrowdsourced() const {
713  // Allow all forms in Autocheckout flow to be crowdsourced.
714  return (!has_author_specified_types_ && ShouldBeParsed(true)) ||
715      IsAutocheckoutEnabled();
716}
717
718void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
719  // Map from field signatures to cached fields.
720  std::map<std::string, const AutofillField*> cached_fields;
721  for (size_t i = 0; i < cached_form.field_count(); ++i) {
722    const AutofillField* field = cached_form.field(i);
723    cached_fields[field->FieldSignature()] = field;
724  }
725
726  for (std::vector<AutofillField*>::const_iterator iter = begin();
727       iter != end(); ++iter) {
728    AutofillField* field = *iter;
729
730    std::map<std::string, const AutofillField*>::const_iterator
731        cached_field = cached_fields.find(field->FieldSignature());
732    if (cached_field != cached_fields.end()) {
733      if (field->form_control_type != "select-one" &&
734          field->value == cached_field->second->value) {
735        // From the perspective of learning user data, text fields containing
736        // default values are equivalent to empty fields.
737        field->value = base::string16();
738      }
739
740      field->set_heuristic_type(cached_field->second->heuristic_type());
741      field->set_server_type(cached_field->second->server_type());
742    }
743  }
744
745  UpdateAutofillCount();
746
747  filled_by_autocheckout_ = cached_form.filled_by_autocheckout();
748  server_experiment_id_ = cached_form.server_experiment_id();
749
750  // The form signature should match between query and upload requests to the
751  // server. On many websites, form elements are dynamically added, removed, or
752  // rearranged via JavaScript between page load and form submission, so we
753  // copy over the |form_signature_field_names_| corresponding to the query
754  // request.
755  DCHECK_EQ(cached_form.form_name_, form_name_);
756  DCHECK_EQ(cached_form.source_url_, source_url_);
757  DCHECK_EQ(cached_form.target_url_, target_url_);
758  form_signature_field_names_ = cached_form.form_signature_field_names_;
759}
760
761void FormStructure::LogQualityMetrics(
762    const AutofillMetrics& metric_logger,
763    const base::TimeTicks& load_time,
764    const base::TimeTicks& interaction_time,
765    const base::TimeTicks& submission_time) const {
766  std::string experiment_id = server_experiment_id();
767  metric_logger.LogServerExperimentIdForUpload(experiment_id);
768
769  size_t num_detected_field_types = 0;
770  bool did_autofill_all_possible_fields = true;
771  bool did_autofill_some_possible_fields = false;
772  for (size_t i = 0; i < field_count(); ++i) {
773    const AutofillField* field = this->field(i);
774    metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
775                                   experiment_id);
776
777    // No further logging for empty fields nor for fields where the entered data
778    // does not appear to already exist in the user's stored Autofill data.
779    const FieldTypeSet& field_types = field->possible_types();
780    DCHECK(!field_types.empty());
781    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
782      continue;
783
784    ++num_detected_field_types;
785    if (field->is_autofilled)
786      did_autofill_some_possible_fields = true;
787    else
788      did_autofill_all_possible_fields = false;
789
790    // Collapse field types that Chrome treats as identical, e.g. home and
791    // billing address fields.
792    FieldTypeSet collapsed_field_types;
793    for (FieldTypeSet::const_iterator it = field_types.begin();
794         it != field_types.end();
795         ++it) {
796      // Since we currently only support US phone numbers, the (city code + main
797      // digits) number is almost always identical to the whole phone number.
798      // TODO(isherman): Improve this logic once we add support for
799      // international numbers.
800      if (*it == PHONE_HOME_CITY_AND_NUMBER)
801        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
802      else
803        collapsed_field_types.insert(AutofillType::GetEquivalentFieldType(*it));
804    }
805
806    // Capture the field's type, if it is unambiguous.
807    AutofillFieldType field_type = UNKNOWN_TYPE;
808    if (collapsed_field_types.size() == 1)
809      field_type = *collapsed_field_types.begin();
810
811    AutofillFieldType heuristic_type = field->heuristic_type();
812    AutofillFieldType server_type = field->server_type();
813    AutofillFieldType predicted_type = field->type();
814
815    // Log heuristic, server, and overall type quality metrics, independently of
816    // whether the field was autofilled.
817    if (heuristic_type == UNKNOWN_TYPE) {
818      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
819                                               field_type, experiment_id);
820    } else if (field_types.count(heuristic_type)) {
821      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
822                                               field_type, experiment_id);
823    } else {
824      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
825                                               field_type, experiment_id);
826    }
827
828    if (server_type == NO_SERVER_DATA) {
829      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
830                                            field_type, experiment_id);
831    } else if (field_types.count(server_type)) {
832      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
833                                            field_type, experiment_id);
834    } else {
835      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
836                                            field_type, experiment_id);
837    }
838
839    if (predicted_type == UNKNOWN_TYPE) {
840      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
841                                             field_type, experiment_id);
842    } else if (field_types.count(predicted_type)) {
843      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
844                                             field_type, experiment_id);
845    } else {
846      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
847                                             field_type, experiment_id);
848    }
849
850    // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
851    // have to skip them for the remaining metrics.
852    if (field->form_control_type == "select-one")
853      continue;
854
855    if (field->is_autofilled) {
856      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
857                                     experiment_id);
858    } else {
859      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
860                                     experiment_id);
861
862      if (heuristic_type == UNKNOWN_TYPE) {
863        metric_logger.LogQualityMetric(
864            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
865            experiment_id);
866      } else if (field_types.count(heuristic_type)) {
867        metric_logger.LogQualityMetric(
868            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
869            experiment_id);
870      } else {
871        metric_logger.LogQualityMetric(
872            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
873            experiment_id);
874      }
875
876      if (server_type == NO_SERVER_DATA) {
877        metric_logger.LogQualityMetric(
878            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
879            experiment_id);
880      } else if (field_types.count(server_type)) {
881        metric_logger.LogQualityMetric(
882            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
883            experiment_id);
884      } else {
885        metric_logger.LogQualityMetric(
886            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
887            experiment_id);
888      }
889    }
890  }
891
892  if (num_detected_field_types < RequiredFillableFields()) {
893    metric_logger.LogUserHappinessMetric(
894        AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
895  } else {
896    if (did_autofill_all_possible_fields) {
897      metric_logger.LogUserHappinessMetric(
898          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
899    } else if (did_autofill_some_possible_fields) {
900      metric_logger.LogUserHappinessMetric(
901          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
902    } else {
903      metric_logger.LogUserHappinessMetric(
904          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
905    }
906
907    // Unlike the other times, the |submission_time| should always be available.
908    DCHECK(!submission_time.is_null());
909
910    // The |load_time| might be unset, in the case that the form was dynamically
911    // added to the DOM.
912    if (!load_time.is_null()) {
913      // Submission should always chronologically follow form load.
914      DCHECK(submission_time > load_time);
915      base::TimeDelta elapsed = submission_time - load_time;
916      if (did_autofill_some_possible_fields)
917        metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
918      else
919        metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
920    }
921
922    // The |interaction_time| might be unset, in the case that the user
923    // submitted a blank form.
924    if (!interaction_time.is_null()) {
925      // Submission should always chronologically follow interaction.
926      DCHECK(submission_time > interaction_time);
927      base::TimeDelta elapsed = submission_time - interaction_time;
928      if (did_autofill_some_possible_fields) {
929        metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
930      } else {
931        metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
932            elapsed);
933      }
934    }
935  }
936}
937
938const AutofillField* FormStructure::field(size_t index) const {
939  if (index >= fields_.size()) {
940    NOTREACHED();
941    return NULL;
942  }
943
944  return fields_[index];
945}
946
947AutofillField* FormStructure::field(size_t index) {
948  return const_cast<AutofillField*>(
949      static_cast<const FormStructure*>(this)->field(index));
950}
951
952size_t FormStructure::field_count() const {
953  return fields_.size();
954}
955
956size_t FormStructure::active_field_count() const {
957  return active_field_count_;
958}
959
960std::string FormStructure::server_experiment_id() const {
961  return server_experiment_id_;
962}
963
964FormData FormStructure::ToFormData() const {
965  // |data.user_submitted| will always be false.
966  FormData data;
967  data.name = form_name_;
968  data.origin = source_url_;
969  data.action = target_url_;
970  data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
971
972  for (size_t i = 0; i < fields_.size(); ++i) {
973    data.fields.push_back(FormFieldData(*fields_[i]));
974  }
975
976  return data;
977}
978
979bool FormStructure::operator==(const FormData& form) const {
980  // TODO(jhawkins): Is this enough to differentiate a form?
981  if (form_name_ == form.name &&
982      source_url_ == form.origin &&
983      target_url_ == form.action) {
984    return true;
985  }
986
987  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
988  // set up.
989
990  return false;
991}
992
993bool FormStructure::operator!=(const FormData& form) const {
994  return !operator==(form);
995}
996
997std::string FormStructure::Hash64Bit(const std::string& str) {
998  std::string hash_bin = base::SHA1HashString(str);
999  DCHECK_EQ(20U, hash_bin.length());
1000
1001  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
1002                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
1003                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
1004                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
1005                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
1006                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
1007                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
1008                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);
1009
1010  return base::Uint64ToString(hash64);
1011}
1012
1013bool FormStructure::EncodeFormRequest(
1014    FormStructure::EncodeRequestType request_type,
1015    buzz::XmlElement* encompassing_xml_element) const {
1016  if (!field_count())  // Nothing to add.
1017    return false;
1018
1019  // Some badly formatted web sites repeat fields - limit number of fields to
1020  // 48, which is far larger than any valid form and XML still fits into 2K.
1021  // Do not send requests for forms with more than this many fields, as they are
1022  // near certainly not valid/auto-fillable.
1023  const size_t kMaxFieldsOnTheForm = 48;
1024  if (field_count() > kMaxFieldsOnTheForm)
1025    return false;
1026
1027  // Add the child nodes for the form fields.
1028  for (size_t index = 0; index < field_count(); ++index) {
1029    const AutofillField* field = fields_[index];
1030    switch (request_type) {
1031      case FormStructure::UPLOAD:
1032        EncodeFieldForUpload(*field, encompassing_xml_element);
1033        break;
1034      case FormStructure::QUERY:
1035        if (ShouldSkipField(*field))
1036          continue;
1037        EncodeFieldForQuery(*field, encompassing_xml_element);
1038        break;
1039      case FormStructure::FIELD_ASSIGNMENTS:
1040        EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
1041        break;
1042    }
1043  }
1044  return true;
1045}
1046
1047void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1048    bool* found_types,
1049    bool* found_sections) {
1050  const std::string kDefaultSection = "-default";
1051
1052  *found_types = false;
1053  *found_sections = false;
1054  for (std::vector<AutofillField*>::iterator it = fields_.begin();
1055       it != fields_.end(); ++it) {
1056    AutofillField* field = *it;
1057
1058    // To prevent potential section name collisions, add a default suffix for
1059    // other fields.  Without this, 'autocomplete' attribute values
1060    // "section--shipping street-address" and "shipping street-address" would be
1061    // parsed identically, given the section handling code below.  We do this
1062    // before any validation so that fields with invalid attributes still end up
1063    // in the default section.  These default section names will be overridden
1064    // by subsequent heuristic parsing steps if there are no author-specified
1065    // section names.
1066    field->set_section(kDefaultSection);
1067
1068    // Canonicalize the attribute value by trimming whitespace, collapsing
1069    // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1070    std::string autocomplete_attribute =
1071        CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1072    autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1073
1074    // The autocomplete attribute is overloaded: it can specify either a field
1075    // type hint or whether autocomplete should be enabled at all.  Ignore the
1076    // latter type of attribute value.
1077    if (autocomplete_attribute.empty() ||
1078        autocomplete_attribute == "on" ||
1079        autocomplete_attribute == "off") {
1080      continue;
1081    }
1082
1083    // Any other value, even it is invalid, is considered to be a type hint.
1084    // This allows a website's author to specify an attribute like
1085    // autocomplete="other" on a field to disable all Autofill heuristics for
1086    // the form.
1087    *found_types = true;
1088
1089    // Tokenize the attribute value.  Per the spec, the tokens are parsed in
1090    // reverse order.
1091    std::vector<std::string> tokens;
1092    Tokenize(autocomplete_attribute, " ", &tokens);
1093
1094    // The final token must be the field type.
1095    // If it is not one of the known types, abort.
1096    DCHECK(!tokens.empty());
1097    std::string field_type_token = tokens.back();
1098    tokens.pop_back();
1099    AutofillFieldType field_type =
1100        FieldTypeFromAutocompleteType(field_type_token, *field);
1101    if (field_type == UNKNOWN_TYPE)
1102      continue;
1103
1104    // The preceding token, if any, may be a type hint.
1105    if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1106      // If it is, it must match the field type; otherwise, abort.
1107      // Note that an invalid token invalidates the entire attribute value, even
1108      // if the other tokens are valid.
1109      if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1110        continue;
1111
1112      // Chrome Autofill ignores these type hints.
1113      tokens.pop_back();
1114    }
1115
1116    // The preceding token, if any, may be a fixed string that is either
1117    // "shipping" or "billing".  Chrome Autofill treats these as implicit
1118    // section name suffixes.
1119    DCHECK_EQ(kDefaultSection, field->section());
1120    std::string section = field->section();
1121    if (!tokens.empty() &&
1122        (tokens.back() == kShippingSection ||
1123         tokens.back() == kBillingSection)) {
1124      // Set Autofill field type to billing if section is billing.
1125      if (tokens.back() == kBillingSection)
1126        field_type = AutofillType::GetEquivalentBillingFieldType(field_type);
1127      section = "-" + tokens.back();
1128      tokens.pop_back();
1129    }
1130
1131    // The preceding token, if any, may be a named section.
1132    const std::string kSectionPrefix = "section-";
1133    if (!tokens.empty() &&
1134        StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1135      // Prepend this section name to the suffix set in the preceding block.
1136      section = tokens.back().substr(kSectionPrefix.size()) + section;
1137      tokens.pop_back();
1138    }
1139
1140    // No other tokens are allowed.  If there are any remaining, abort.
1141    if (!tokens.empty())
1142      continue;
1143
1144    if (section != kDefaultSection) {
1145      *found_sections = true;
1146      field->set_section(section);
1147    }
1148
1149    // No errors encountered while parsing!
1150    // Update the |field|'s type based on what was parsed from the attribute.
1151    field->set_heuristic_type(field_type);
1152    if (field_type_token == "tel-local-prefix")
1153      field->set_phone_part(AutofillField::PHONE_PREFIX);
1154    else if (field_type_token == "tel-local-suffix")
1155      field->set_phone_part(AutofillField::PHONE_SUFFIX);
1156  }
1157}
1158
1159void FormStructure::IdentifySections(bool has_author_specified_sections) {
1160  if (fields_.empty())
1161    return;
1162
1163  if (!has_author_specified_sections) {
1164    // Name sections after the first field in the section.
1165    base::string16 current_section = fields_.front()->unique_name();
1166
1167    // Keep track of the types we've seen in this section.
1168    std::set<AutofillFieldType> seen_types;
1169    AutofillFieldType previous_type = UNKNOWN_TYPE;
1170
1171    for (std::vector<AutofillField*>::iterator field = fields_.begin();
1172         field != fields_.end(); ++field) {
1173      const AutofillFieldType current_type =
1174          AutofillType::GetEquivalentFieldType((*field)->type());
1175
1176      bool already_saw_current_type = seen_types.count(current_type) > 0;
1177
1178      // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1179      // evening phone number.  Our phone number detection is also generally a
1180      // little off.  Hence, ignore this field type as a signal here.
1181      if (AutofillType(current_type).group() == AutofillType::PHONE_HOME)
1182        already_saw_current_type = false;
1183
1184      // Some forms have adjacent fields of the same type.  Two common examples:
1185      //  * Forms with two email fields, where the second is meant to "confirm"
1186      //    the first.
1187      //  * Forms with a <select> menu for states in some countries, and a
1188      //    freeform <input> field for states in other countries.  (Usually,
1189      //    only one of these two will be visible for any given choice of
1190      //    country.)
1191      // Generally, adjacent fields of the same type belong in the same logical
1192      // section.
1193      if (current_type == previous_type)
1194        already_saw_current_type = false;
1195
1196      previous_type = current_type;
1197
1198      if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1199        // We reached the end of a section, so start a new section.
1200        seen_types.clear();
1201        current_section = (*field)->unique_name();
1202      }
1203
1204      seen_types.insert(current_type);
1205      (*field)->set_section(UTF16ToUTF8(current_section));
1206    }
1207  }
1208
1209  // Ensure that credit card and address fields are in separate sections.
1210  // This simplifies the section-aware logic in autofill_manager.cc.
1211  for (std::vector<AutofillField*>::iterator field = fields_.begin();
1212       field != fields_.end(); ++field) {
1213    AutofillType::FieldTypeGroup field_type_group =
1214        AutofillType((*field)->type()).group();
1215    if (field_type_group == AutofillType::CREDIT_CARD)
1216      (*field)->set_section((*field)->section() + "-cc");
1217    else
1218      (*field)->set_section((*field)->section() + "-default");
1219  }
1220}
1221
1222}  // namespace autofill
1223