form_structure.cc revision 58537e28ecd584eab876aee8be7156509866d23a
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/logging.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/sha1.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_util.h"
16#include "base/strings/stringprintf.h"
17#include "base/strings/utf_string_conversions.h"
18#include "base/time/time.h"
19#include "components/autofill/core/browser/autofill_metrics.h"
20#include "components/autofill/core/browser/autofill_type.h"
21#include "components/autofill/core/browser/autofill_xml_parser.h"
22#include "components/autofill/core/browser/field_types.h"
23#include "components/autofill/core/browser/form_field.h"
24#include "components/autofill/core/common/autofill_constants.h"
25#include "components/autofill/core/common/form_data.h"
26#include "components/autofill/core/common/form_data_predictions.h"
27#include "components/autofill/core/common/form_field_data.h"
28#include "components/autofill/core/common/form_field_data_predictions.h"
29#include "third_party/icu/source/i18n/unicode/regex.h"
30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
31
32namespace autofill {
33namespace {
34
35const char kFormMethodPost[] = "post";
36
37// XML elements and attributes.
38const char kAttributeAcceptedFeatures[] = "accepts";
39const char kAttributeAutofillUsed[] = "autofillused";
40const char kAttributeAutofillType[] = "autofilltype";
41const char kAttributeClientVersion[] = "clientversion";
42const char kAttributeDataPresent[] = "datapresent";
43const char kAttributeFieldID[] = "fieldid";
44const char kAttributeFieldType[] = "fieldtype";
45const char kAttributeFormSignature[] = "formsignature";
46const char kAttributeName[] = "name";
47const char kAttributeSignature[] = "signature";
48const char kAttributeUrlprefixSignature[] = "urlprefixsignature";
49const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
50const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
51const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
52const char kXMLElementAutofillQuery[] = "autofillquery";
53const char kXMLElementAutofillUpload[] = "autofillupload";
54const char kXMLElementFieldAssignments[] = "fieldassignments";
55const char kXMLElementField[] = "field";
56const char kXMLElementFields[] = "fields";
57const char kXMLElementForm[] = "form";
58const char kBillingMode[] = "billing";
59const char kShippingMode[] = "shipping";
60
61// Stip away >= 5 consecutive digits.
62const char kIgnorePatternInFieldName[] = "\\d{5,}+";
63
64// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
65// |available_field_types| and returns the hex representation as a string.
66std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
67  // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
68  // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
69  const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
70
71  // Pack the types in |available_field_types| into |bit_field|.
72  std::vector<uint8> bit_field(kNumBytes, 0);
73  for (ServerFieldTypeSet::const_iterator field_type =
74           available_field_types.begin();
75       field_type != available_field_types.end();
76       ++field_type) {
77    // Set the appropriate bit in the field.  The bit we set is the one
78    // |field_type| % 8 from the left of the byte.
79    const size_t byte = *field_type / 8;
80    const size_t bit = 0x80 >> (*field_type % 8);
81    DCHECK(byte < bit_field.size());
82    bit_field[byte] |= bit;
83  }
84
85  // Discard any trailing zeroes.
86  // If there are no available types, we return the empty string.
87  size_t data_end = bit_field.size();
88  for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
89  }
90
91  // Print all meaningfull bytes into a string.
92  std::string data_presence;
93  data_presence.reserve(data_end * 2 + 1);
94  for (size_t i = 0; i < data_end; ++i) {
95    base::StringAppendF(&data_presence, "%02x", bit_field[i]);
96  }
97
98  return data_presence;
99}
100
101// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
102// in upload xml, and also add them to the parent XmlElement.
103void EncodeFieldForUpload(const AutofillField& field,
104                          buzz::XmlElement* parent) {
105  // Don't upload checkable fields.
106  if (field.is_checkable)
107    return;
108
109  ServerFieldTypeSet types = field.possible_types();
110  // |types| could be empty in unit-tests only.
111  for (ServerFieldTypeSet::iterator field_type = types.begin();
112       field_type != types.end(); ++field_type) {
113    buzz::XmlElement *field_element = new buzz::XmlElement(
114        buzz::QName(kXMLElementField));
115
116    field_element->SetAttr(buzz::QName(kAttributeSignature),
117                           field.FieldSignature());
118    field_element->SetAttr(buzz::QName(kAttributeAutofillType),
119                           base::IntToString(*field_type));
120    parent->AddElement(field_element);
121  }
122}
123
124// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
125// in query xml, and also add it to the parent XmlElement.
126void EncodeFieldForQuery(const AutofillField& field,
127                         buzz::XmlElement* parent) {
128  buzz::XmlElement *field_element = new buzz::XmlElement(
129      buzz::QName(kXMLElementField));
130  field_element->SetAttr(buzz::QName(kAttributeSignature),
131                         field.FieldSignature());
132  parent->AddElement(field_element);
133}
134
135// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
136// in field assignments xml, and also add them to the parent XmlElement.
137void EncodeFieldForFieldAssignments(const AutofillField& field,
138                                    buzz::XmlElement* parent) {
139  ServerFieldTypeSet types = field.possible_types();
140  for (ServerFieldTypeSet::iterator field_type = types.begin();
141       field_type != types.end(); ++field_type) {
142    buzz::XmlElement *field_element = new buzz::XmlElement(
143        buzz::QName(kXMLElementFields));
144
145    field_element->SetAttr(buzz::QName(kAttributeFieldID),
146                           field.FieldSignature());
147    field_element->SetAttr(buzz::QName(kAttributeFieldType),
148                           base::IntToString(*field_type));
149    field_element->SetAttr(buzz::QName(kAttributeName),
150                           UTF16ToUTF8(field.name));
151    parent->AddElement(field_element);
152  }
153}
154
155// Returns |true| iff the |token| is a type hint for a contact field, as
156// specified in the implementation section of http://is.gd/whatwg_autocomplete
157// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
158// support filling either type of information.
159bool IsContactTypeHint(const std::string& token) {
160  return token == "home" || token == "work" || token == "mobile";
161}
162
163// Returns |true| iff the |token| is a type hint appropriate for a field of the
164// given |field_type|, as specified in the implementation section of
165// http://is.gd/whatwg_autocomplete
166bool ContactTypeHintMatchesFieldType(const std::string& token,
167                                     HtmlFieldType field_type) {
168  // The "home" and "work" type hints are only appropriate for email and phone
169  // number field types.
170  if (token == "home" || token == "work") {
171    return field_type == HTML_TYPE_EMAIL ||
172        (field_type >= HTML_TYPE_TEL &&
173         field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
174  }
175
176  // The "mobile" type hint is only appropriate for phone number field types.
177  // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
178  // support filling either type of information.
179  if (token == "mobile") {
180    return field_type >= HTML_TYPE_TEL &&
181        field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
182  }
183
184  return false;
185}
186
187// Returns the Chrome Autofill-supported field type corresponding to the given
188// |autocomplete_attribute_value|, if there is one, in the context of the given
189// |field|.  Chrome Autofill supports a subset of the field types listed at
190// http://is.gd/whatwg_autocomplete
191HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
192    const std::string& autocomplete_attribute_value,
193    const AutofillField& field) {
194  if (autocomplete_attribute_value == "name")
195    return HTML_TYPE_NAME;
196
197  if (autocomplete_attribute_value == "given-name")
198    return HTML_TYPE_GIVEN_NAME;
199
200  if (autocomplete_attribute_value == "additional-name") {
201    if (field.max_length == 1)
202      return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
203    else
204      return HTML_TYPE_ADDITIONAL_NAME;
205  }
206
207  if (autocomplete_attribute_value == "family-name")
208    return HTML_TYPE_FAMILY_NAME;
209
210  if (autocomplete_attribute_value == "organization")
211    return HTML_TYPE_ORGANIZATION;
212
213  if (autocomplete_attribute_value == "street-address")
214    return HTML_TYPE_STREET_ADDRESS;
215
216  if (autocomplete_attribute_value == "address-line1")
217    return HTML_TYPE_ADDRESS_LINE1;
218
219  if (autocomplete_attribute_value == "address-line2")
220    return HTML_TYPE_ADDRESS_LINE2;
221
222  if (autocomplete_attribute_value == "locality")
223    return HTML_TYPE_LOCALITY;
224
225  if (autocomplete_attribute_value == "region")
226    return HTML_TYPE_REGION;
227
228  if (autocomplete_attribute_value == "country")
229    return HTML_TYPE_COUNTRY_CODE;
230
231  if (autocomplete_attribute_value == "country-name")
232    return HTML_TYPE_COUNTRY_NAME;
233
234  if (autocomplete_attribute_value == "postal-code")
235    return HTML_TYPE_POSTAL_CODE;
236
237  if (autocomplete_attribute_value == "cc-name")
238    return HTML_TYPE_CREDIT_CARD_NAME;
239
240  if (autocomplete_attribute_value == "cc-number")
241    return HTML_TYPE_CREDIT_CARD_NUMBER;
242
243  if (autocomplete_attribute_value == "cc-exp") {
244    if (field.max_length == 5)
245      return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
246    else if (field.max_length == 7)
247      return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
248    else
249      return HTML_TYPE_CREDIT_CARD_EXP;
250  }
251
252  if (autocomplete_attribute_value == "cc-exp-month")
253    return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
254
255  if (autocomplete_attribute_value == "cc-exp-year") {
256    if (field.max_length == 2)
257      return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
258    else if (field.max_length == 4)
259      return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
260    else
261      return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
262  }
263
264  if (autocomplete_attribute_value == "cc-csc")
265    return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
266
267  if (autocomplete_attribute_value == "cc-type")
268    return HTML_TYPE_CREDIT_CARD_TYPE;
269
270  if (autocomplete_attribute_value == "tel")
271    return HTML_TYPE_TEL;
272
273  if (autocomplete_attribute_value == "tel-country-code")
274    return HTML_TYPE_TEL_COUNTRY_CODE;
275
276  if (autocomplete_attribute_value == "tel-national")
277    return HTML_TYPE_TEL_NATIONAL;
278
279  if (autocomplete_attribute_value == "tel-area-code")
280    return HTML_TYPE_TEL_AREA_CODE;
281
282  if (autocomplete_attribute_value == "tel-local")
283    return HTML_TYPE_TEL_LOCAL;
284
285  if (autocomplete_attribute_value == "tel-local-prefix")
286    return HTML_TYPE_TEL_LOCAL_PREFIX;
287
288  if (autocomplete_attribute_value == "tel-local-suffix")
289    return HTML_TYPE_TEL_LOCAL_SUFFIX;
290
291  if (autocomplete_attribute_value == "email")
292    return HTML_TYPE_EMAIL;
293
294  return HTML_TYPE_UNKNOWN;
295}
296
297std::string StripDigitsIfRequired(const base::string16& input) {
298  UErrorCode status = U_ZERO_ERROR;
299  CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
300                         (kIgnorePatternInFieldName));
301  CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
302                         (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
303  DCHECK_EQ(status, U_ZERO_ERROR);
304
305  icu::UnicodeString icu_input(input.data(), input.length());
306  matcher.reset(icu_input);
307
308  icu::UnicodeString replaced_string = matcher.replaceAll("", status);
309
310  std::string return_string;
311  status = U_ZERO_ERROR;
312  UTF16ToUTF8(replaced_string.getBuffer(),
313              static_cast<size_t>(replaced_string.length()),
314              &return_string);
315  if (status != U_ZERO_ERROR) {
316    DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input);
317    return UTF16ToUTF8(input);
318  }
319
320  return return_string;
321}
322
323}  // namespace
324
325FormStructure::FormStructure(const FormData& form)
326    : form_name_(form.name),
327      source_url_(form.origin),
328      target_url_(form.action),
329      autofill_count_(0),
330      active_field_count_(0),
331      upload_required_(USE_UPLOAD_RATES),
332      server_experiment_id_("no server response"),
333      has_author_specified_types_(false) {
334  // Copy the form fields.
335  std::map<base::string16, size_t> unique_names;
336  for (std::vector<FormFieldData>::const_iterator field =
337           form.fields.begin();
338       field != form.fields.end(); field++) {
339
340    if (!ShouldSkipField(*field)) {
341      // Add all supported form fields (including with empty names) to the
342      // signature.  This is a requirement for Autofill servers.
343      form_signature_field_names_.append("&");
344      form_signature_field_names_.append(StripDigitsIfRequired(field->name));
345
346      ++active_field_count_;
347    }
348
349    // Generate a unique name for this field by appending a counter to the name.
350    // Make sure to prepend the counter with a non-numeric digit so that we are
351    // guaranteed to avoid collisions.
352    if (!unique_names.count(field->name))
353      unique_names[field->name] = 1;
354    else
355      ++unique_names[field->name];
356    base::string16 unique_name = field->name + ASCIIToUTF16("_") +
357        base::IntToString16(unique_names[field->name]);
358    fields_.push_back(new AutofillField(*field, unique_name));
359  }
360
361  std::string method = UTF16ToUTF8(form.method);
362  if (StringToLowerASCII(method) == kFormMethodPost) {
363    method_ = POST;
364  } else {
365    // Either the method is 'get', or we don't know.  In this case we default
366    // to GET.
367    method_ = GET;
368  }
369}
370
371FormStructure::~FormStructure() {}
372
373void FormStructure::DetermineHeuristicTypes(
374    const AutofillMetrics& metric_logger) {
375  // First, try to detect field types based on each field's |autocomplete|
376  // attribute value.  If there is at least one form field that specifies an
377  // autocomplete type hint, don't try to apply other heuristics to match fields
378  // in this form.
379  bool has_author_specified_sections;
380  ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
381                                            &has_author_specified_sections);
382
383  if (!has_author_specified_types_) {
384    ServerFieldTypeMap field_type_map;
385    FormField::ParseFormFields(fields_.get(), &field_type_map);
386    for (size_t i = 0; i < field_count(); ++i) {
387      AutofillField* field = fields_[i];
388      ServerFieldTypeMap::iterator iter =
389          field_type_map.find(field->unique_name());
390      if (iter != field_type_map.end())
391        field->set_heuristic_type(iter->second);
392    }
393  }
394
395  UpdateAutofillCount();
396  IdentifySections(has_author_specified_sections);
397
398  if (IsAutofillable(true)) {
399    metric_logger.LogDeveloperEngagementMetric(
400        AutofillMetrics::FILLABLE_FORM_PARSED);
401    if (has_author_specified_types_) {
402      metric_logger.LogDeveloperEngagementMetric(
403          AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
404    }
405  }
406}
407
408bool FormStructure::EncodeUploadRequest(
409    const ServerFieldTypeSet& available_field_types,
410    bool form_was_autofilled,
411    std::string* encoded_xml) const {
412  DCHECK(ShouldBeCrowdsourced());
413
414  // Verify that |available_field_types| agrees with the possible field types we
415  // are uploading.
416  for (std::vector<AutofillField*>::const_iterator field = begin();
417       field != end();
418       ++field) {
419    for (ServerFieldTypeSet::const_iterator type =
420             (*field)->possible_types().begin();
421         type != (*field)->possible_types().end();
422         ++type) {
423      DCHECK(*type == UNKNOWN_TYPE ||
424             *type == EMPTY_TYPE ||
425             available_field_types.count(*type));
426    }
427  }
428
429  // Set up the <autofillupload> element and its attributes.
430  buzz::XmlElement autofill_request_xml(
431      (buzz::QName(kXMLElementAutofillUpload)));
432  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
433                               kClientVersion);
434  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
435                               FormSignature());
436  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
437                               form_was_autofilled ? "true" : "false");
438  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
439                               EncodeFieldTypes(available_field_types).c_str());
440
441  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
442    return false;  // Malformed form, skip it.
443
444  // Obtain the XML structure as a string.
445  *encoded_xml = kXMLDeclaration;
446  *encoded_xml += autofill_request_xml.Str().c_str();
447
448  // To enable this logging, run with the flag --vmodule="form_structure=2".
449  VLOG(2) << "\n" << *encoded_xml;
450
451  return true;
452}
453
454bool FormStructure::EncodeFieldAssignments(
455    const ServerFieldTypeSet& available_field_types,
456    std::string* encoded_xml) const {
457  DCHECK(ShouldBeCrowdsourced());
458
459  // Set up the <fieldassignments> element and its attributes.
460  buzz::XmlElement autofill_request_xml(
461      (buzz::QName(kXMLElementFieldAssignments)));
462  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
463                               FormSignature());
464
465  if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
466                         &autofill_request_xml))
467    return false;  // Malformed form, skip it.
468
469  // Obtain the XML structure as a string.
470  *encoded_xml = kXMLDeclaration;
471  *encoded_xml += autofill_request_xml.Str().c_str();
472
473  return true;
474}
475
476// static
477bool FormStructure::EncodeQueryRequest(
478    const std::vector<FormStructure*>& forms,
479    std::vector<std::string>* encoded_signatures,
480    std::string* encoded_xml) {
481  DCHECK(encoded_signatures);
482  DCHECK(encoded_xml);
483  encoded_xml->clear();
484  encoded_signatures->clear();
485  encoded_signatures->reserve(forms.size());
486
487  // Set up the <autofillquery> element and attributes.
488  buzz::XmlElement autofill_request_xml(
489      (buzz::QName(kXMLElementAutofillQuery)));
490  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
491                               kClientVersion);
492
493  // Some badly formatted web sites repeat forms - detect that and encode only
494  // one form as returned data would be the same for all the repeated forms.
495  std::set<std::string> processed_forms;
496  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
497       it != forms.end();
498       ++it) {
499    std::string signature((*it)->FormSignature());
500    if (processed_forms.find(signature) != processed_forms.end())
501      continue;
502    processed_forms.insert(signature);
503    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
504        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
505    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
506                                      signature);
507
508    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
509                                  encompassing_xml_element.get()))
510      continue;  // Malformed form, skip it.
511
512    autofill_request_xml.AddElement(encompassing_xml_element.release());
513    encoded_signatures->push_back(signature);
514  }
515
516  if (!encoded_signatures->size())
517    return false;
518
519  autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
520                               kAcceptedFeaturesExperiment);
521
522  // Obtain the XML structure as a string.
523  *encoded_xml = kXMLDeclaration;
524  *encoded_xml += autofill_request_xml.Str().c_str();
525
526  return true;
527}
528
529// static
530void FormStructure::ParseQueryResponse(
531    const std::string& response_xml,
532    const std::vector<FormStructure*>& forms,
533    const AutofillMetrics& metric_logger) {
534  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
535
536  // Parse the field types from the server response to the query.
537  std::vector<AutofillServerFieldInfo> field_infos;
538  UploadRequired upload_required;
539  std::string experiment_id;
540  AutofillQueryXmlParser parse_handler(&field_infos,
541                                       &upload_required,
542                                       &experiment_id);
543  buzz::XmlParser parser(&parse_handler);
544  parser.Parse(response_xml.c_str(), response_xml.length(), true);
545  if (!parse_handler.succeeded())
546    return;
547
548  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
549  metric_logger.LogServerExperimentIdForQuery(experiment_id);
550
551  bool heuristics_detected_fillable_field = false;
552  bool query_response_overrode_heuristics = false;
553
554  // Copy the field types into the actual form.
555  std::vector<AutofillServerFieldInfo>::iterator current_info =
556      field_infos.begin();
557  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
558       iter != forms.end(); ++iter) {
559    FormStructure* form = *iter;
560    form->upload_required_ = upload_required;
561    form->server_experiment_id_ = experiment_id;
562
563    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
564         field != form->fields_.end(); ++field) {
565      if (form->ShouldSkipField(**field))
566        continue;
567
568      // In some cases *successful* response does not return all the fields.
569      // Quit the update of the types then.
570      if (current_info == field_infos.end())
571        break;
572
573      // UNKNOWN_TYPE is reserved for use by the client.
574      DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
575
576      ServerFieldType heuristic_type = (*field)->heuristic_type();
577      if (heuristic_type != UNKNOWN_TYPE)
578        heuristics_detected_fillable_field = true;
579
580      (*field)->set_server_type(current_info->field_type);
581      if (heuristic_type != (*field)->Type().GetStorableType())
582        query_response_overrode_heuristics = true;
583
584      // Copy default value into the field if available.
585      if (!current_info->default_value.empty())
586        (*field)->set_default_value(current_info->default_value);
587
588      ++current_info;
589    }
590
591    form->UpdateAutofillCount();
592    form->IdentifySections(false);
593  }
594
595  AutofillMetrics::ServerQueryMetric metric;
596  if (query_response_overrode_heuristics) {
597    if (heuristics_detected_fillable_field) {
598      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
599    } else {
600      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
601    }
602  } else {
603    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
604  }
605  metric_logger.LogServerQueryMetric(metric);
606}
607
608// static
609void FormStructure::GetFieldTypePredictions(
610    const std::vector<FormStructure*>& form_structures,
611    std::vector<FormDataPredictions>* forms) {
612  forms->clear();
613  forms->reserve(form_structures.size());
614  for (size_t i = 0; i < form_structures.size(); ++i) {
615    FormStructure* form_structure = form_structures[i];
616    FormDataPredictions form;
617    form.data.name = form_structure->form_name_;
618    form.data.method =
619        ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
620    form.data.origin = form_structure->source_url_;
621    form.data.action = form_structure->target_url_;
622    form.signature = form_structure->FormSignature();
623    form.experiment_id = form_structure->server_experiment_id_;
624
625    for (std::vector<AutofillField*>::const_iterator field =
626             form_structure->fields_.begin();
627         field != form_structure->fields_.end(); ++field) {
628      form.data.fields.push_back(FormFieldData(**field));
629
630      FormFieldDataPredictions annotated_field;
631      annotated_field.signature = (*field)->FieldSignature();
632      annotated_field.heuristic_type =
633          AutofillType((*field)->heuristic_type()).ToString();
634      annotated_field.server_type =
635          AutofillType((*field)->server_type()).ToString();
636      annotated_field.overall_type = (*field)->Type().ToString();
637      form.fields.push_back(annotated_field);
638    }
639
640    forms->push_back(form);
641  }
642}
643
644std::string FormStructure::FormSignature() const {
645  std::string scheme(target_url_.scheme());
646  std::string host(target_url_.host());
647
648  // If target host or scheme is empty, set scheme and host of source url.
649  // This is done to match the Toolbar's behavior.
650  if (scheme.empty() || host.empty()) {
651    scheme = source_url_.scheme();
652    host = source_url_.host();
653  }
654
655  std::string form_string = scheme + "://" + host + "&" +
656                            UTF16ToUTF8(form_name_) +
657                            form_signature_field_names_;
658
659  return Hash64Bit(form_string);
660}
661
662bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
663  return field.is_checkable;
664}
665
666bool FormStructure::IsAutofillable(bool require_method_post) const {
667  if (autofill_count() < kRequiredAutofillFields)
668    return false;
669
670  return ShouldBeParsed(require_method_post);
671}
672
673void FormStructure::UpdateAutofillCount() {
674  autofill_count_ = 0;
675  for (std::vector<AutofillField*>::const_iterator iter = begin();
676       iter != end(); ++iter) {
677    AutofillField* field = *iter;
678    if (field && field->IsFieldFillable())
679      ++autofill_count_;
680  }
681}
682
683bool FormStructure::ShouldBeParsed(bool require_method_post) const {
684  if (active_field_count() < kRequiredAutofillFields)
685    return false;
686
687  // Rule out http(s)://*/search?...
688  //  e.g. http://www.google.com/search?q=...
689  //       http://search.yahoo.com/search?p=...
690  if (target_url_.path() == "/search")
691    return false;
692
693  bool has_text_field = false;
694  for (std::vector<AutofillField*>::const_iterator it = begin();
695       it != end() && !has_text_field; ++it) {
696    has_text_field |= (*it)->form_control_type != "select-one";
697  }
698  if (!has_text_field)
699    return false;
700
701  return !require_method_post || (method_ == POST);
702}
703
704bool FormStructure::ShouldBeCrowdsourced() const {
705  return !has_author_specified_types_ && ShouldBeParsed(true);
706}
707
708void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
709  // Map from field signatures to cached fields.
710  std::map<std::string, const AutofillField*> cached_fields;
711  for (size_t i = 0; i < cached_form.field_count(); ++i) {
712    const AutofillField* field = cached_form.field(i);
713    cached_fields[field->FieldSignature()] = field;
714  }
715
716  for (std::vector<AutofillField*>::const_iterator iter = begin();
717       iter != end(); ++iter) {
718    AutofillField* field = *iter;
719
720    std::map<std::string, const AutofillField*>::const_iterator
721        cached_field = cached_fields.find(field->FieldSignature());
722    if (cached_field != cached_fields.end()) {
723      if (field->form_control_type != "select-one" &&
724          field->value == cached_field->second->value) {
725        // From the perspective of learning user data, text fields containing
726        // default values are equivalent to empty fields.
727        field->value = base::string16();
728      }
729
730      field->set_heuristic_type(cached_field->second->heuristic_type());
731      field->set_server_type(cached_field->second->server_type());
732    }
733  }
734
735  UpdateAutofillCount();
736
737  server_experiment_id_ = cached_form.server_experiment_id();
738
739  // The form signature should match between query and upload requests to the
740  // server. On many websites, form elements are dynamically added, removed, or
741  // rearranged via JavaScript between page load and form submission, so we
742  // copy over the |form_signature_field_names_| corresponding to the query
743  // request.
744  DCHECK_EQ(cached_form.form_name_, form_name_);
745  DCHECK_EQ(cached_form.source_url_, source_url_);
746  DCHECK_EQ(cached_form.target_url_, target_url_);
747  form_signature_field_names_ = cached_form.form_signature_field_names_;
748}
749
750void FormStructure::LogQualityMetrics(
751    const AutofillMetrics& metric_logger,
752    const base::TimeTicks& load_time,
753    const base::TimeTicks& interaction_time,
754    const base::TimeTicks& submission_time) const {
755  std::string experiment_id = server_experiment_id();
756  metric_logger.LogServerExperimentIdForUpload(experiment_id);
757
758  size_t num_detected_field_types = 0;
759  bool did_autofill_all_possible_fields = true;
760  bool did_autofill_some_possible_fields = false;
761  for (size_t i = 0; i < field_count(); ++i) {
762    const AutofillField* field = this->field(i);
763    metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
764                                   experiment_id);
765
766    // No further logging for empty fields nor for fields where the entered data
767    // does not appear to already exist in the user's stored Autofill data.
768    const ServerFieldTypeSet& field_types = field->possible_types();
769    DCHECK(!field_types.empty());
770    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
771      continue;
772
773    ++num_detected_field_types;
774    if (field->is_autofilled)
775      did_autofill_some_possible_fields = true;
776    else
777      did_autofill_all_possible_fields = false;
778
779    // Collapse field types that Chrome treats as identical, e.g. home and
780    // billing address fields.
781    ServerFieldTypeSet collapsed_field_types;
782    for (ServerFieldTypeSet::const_iterator it = field_types.begin();
783         it != field_types.end();
784         ++it) {
785      // Since we currently only support US phone numbers, the (city code + main
786      // digits) number is almost always identical to the whole phone number.
787      // TODO(isherman): Improve this logic once we add support for
788      // international numbers.
789      if (*it == PHONE_HOME_CITY_AND_NUMBER)
790        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
791      else
792        collapsed_field_types.insert(AutofillType(*it).GetStorableType());
793    }
794
795    // Capture the field's type, if it is unambiguous.
796    ServerFieldType field_type = UNKNOWN_TYPE;
797    if (collapsed_field_types.size() == 1)
798      field_type = *collapsed_field_types.begin();
799
800    ServerFieldType heuristic_type =
801        AutofillType(field->heuristic_type()).GetStorableType();
802    ServerFieldType server_type =
803        AutofillType(field->server_type()).GetStorableType();
804    ServerFieldType predicted_type = field->Type().GetStorableType();
805
806    // Log heuristic, server, and overall type quality metrics, independently of
807    // whether the field was autofilled.
808    if (heuristic_type == UNKNOWN_TYPE) {
809      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
810                                               field_type, experiment_id);
811    } else if (field_types.count(heuristic_type)) {
812      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
813                                               field_type, experiment_id);
814    } else {
815      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
816                                               field_type, experiment_id);
817    }
818
819    if (server_type == NO_SERVER_DATA) {
820      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
821                                            field_type, experiment_id);
822    } else if (field_types.count(server_type)) {
823      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
824                                            field_type, experiment_id);
825    } else {
826      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
827                                            field_type, experiment_id);
828    }
829
830    if (predicted_type == UNKNOWN_TYPE) {
831      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
832                                             field_type, experiment_id);
833    } else if (field_types.count(predicted_type)) {
834      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
835                                             field_type, experiment_id);
836    } else {
837      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
838                                             field_type, experiment_id);
839    }
840
841    // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
842    // have to skip them for the remaining metrics.
843    if (field->form_control_type == "select-one")
844      continue;
845
846    if (field->is_autofilled) {
847      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
848                                     experiment_id);
849    } else {
850      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
851                                     experiment_id);
852
853      if (heuristic_type == UNKNOWN_TYPE) {
854        metric_logger.LogQualityMetric(
855            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
856            experiment_id);
857      } else if (field_types.count(heuristic_type)) {
858        metric_logger.LogQualityMetric(
859            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
860            experiment_id);
861      } else {
862        metric_logger.LogQualityMetric(
863            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
864            experiment_id);
865      }
866
867      if (server_type == NO_SERVER_DATA) {
868        metric_logger.LogQualityMetric(
869            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
870            experiment_id);
871      } else if (field_types.count(server_type)) {
872        metric_logger.LogQualityMetric(
873            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
874            experiment_id);
875      } else {
876        metric_logger.LogQualityMetric(
877            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
878            experiment_id);
879      }
880    }
881  }
882
883  if (num_detected_field_types < kRequiredAutofillFields) {
884    metric_logger.LogUserHappinessMetric(
885        AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
886  } else {
887    if (did_autofill_all_possible_fields) {
888      metric_logger.LogUserHappinessMetric(
889          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
890    } else if (did_autofill_some_possible_fields) {
891      metric_logger.LogUserHappinessMetric(
892          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
893    } else {
894      metric_logger.LogUserHappinessMetric(
895          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
896    }
897
898    // Unlike the other times, the |submission_time| should always be available.
899    DCHECK(!submission_time.is_null());
900
901    // The |load_time| might be unset, in the case that the form was dynamically
902    // added to the DOM.
903    if (!load_time.is_null()) {
904      // Submission should always chronologically follow form load.
905      DCHECK(submission_time > load_time);
906      base::TimeDelta elapsed = submission_time - load_time;
907      if (did_autofill_some_possible_fields)
908        metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
909      else
910        metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
911    }
912
913    // The |interaction_time| might be unset, in the case that the user
914    // submitted a blank form.
915    if (!interaction_time.is_null()) {
916      // Submission should always chronologically follow interaction.
917      DCHECK(submission_time > interaction_time);
918      base::TimeDelta elapsed = submission_time - interaction_time;
919      if (did_autofill_some_possible_fields) {
920        metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
921      } else {
922        metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
923            elapsed);
924      }
925    }
926  }
927}
928
929const AutofillField* FormStructure::field(size_t index) const {
930  if (index >= fields_.size()) {
931    NOTREACHED();
932    return NULL;
933  }
934
935  return fields_[index];
936}
937
938AutofillField* FormStructure::field(size_t index) {
939  return const_cast<AutofillField*>(
940      static_cast<const FormStructure*>(this)->field(index));
941}
942
943size_t FormStructure::field_count() const {
944  return fields_.size();
945}
946
947size_t FormStructure::active_field_count() const {
948  return active_field_count_;
949}
950
951std::string FormStructure::server_experiment_id() const {
952  return server_experiment_id_;
953}
954
955FormData FormStructure::ToFormData() const {
956  // |data.user_submitted| will always be false.
957  FormData data;
958  data.name = form_name_;
959  data.origin = source_url_;
960  data.action = target_url_;
961  data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
962
963  for (size_t i = 0; i < fields_.size(); ++i) {
964    data.fields.push_back(FormFieldData(*fields_[i]));
965  }
966
967  return data;
968}
969
970bool FormStructure::operator==(const FormData& form) const {
971  // TODO(jhawkins): Is this enough to differentiate a form?
972  if (form_name_ == form.name &&
973      source_url_ == form.origin &&
974      target_url_ == form.action) {
975    return true;
976  }
977
978  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
979  // set up.
980
981  return false;
982}
983
984bool FormStructure::operator!=(const FormData& form) const {
985  return !operator==(form);
986}
987
988std::string FormStructure::Hash64Bit(const std::string& str) {
989  std::string hash_bin = base::SHA1HashString(str);
990  DCHECK_EQ(20U, hash_bin.length());
991
992  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
993                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
994                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
995                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
996                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
997                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
998                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
999                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);
1000
1001  return base::Uint64ToString(hash64);
1002}
1003
1004bool FormStructure::EncodeFormRequest(
1005    FormStructure::EncodeRequestType request_type,
1006    buzz::XmlElement* encompassing_xml_element) const {
1007  if (!field_count())  // Nothing to add.
1008    return false;
1009
1010  // Some badly formatted web sites repeat fields - limit number of fields to
1011  // 48, which is far larger than any valid form and XML still fits into 2K.
1012  // Do not send requests for forms with more than this many fields, as they are
1013  // near certainly not valid/auto-fillable.
1014  const size_t kMaxFieldsOnTheForm = 48;
1015  if (field_count() > kMaxFieldsOnTheForm)
1016    return false;
1017
1018  // Add the child nodes for the form fields.
1019  for (size_t index = 0; index < field_count(); ++index) {
1020    const AutofillField* field = fields_[index];
1021    switch (request_type) {
1022      case FormStructure::UPLOAD:
1023        EncodeFieldForUpload(*field, encompassing_xml_element);
1024        break;
1025      case FormStructure::QUERY:
1026        if (ShouldSkipField(*field))
1027          continue;
1028        EncodeFieldForQuery(*field, encompassing_xml_element);
1029        break;
1030      case FormStructure::FIELD_ASSIGNMENTS:
1031        EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
1032        break;
1033    }
1034  }
1035  return true;
1036}
1037
1038void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1039    bool* found_types,
1040    bool* found_sections) {
1041  const std::string kDefaultSection = "-default";
1042
1043  *found_types = false;
1044  *found_sections = false;
1045  for (std::vector<AutofillField*>::iterator it = fields_.begin();
1046       it != fields_.end(); ++it) {
1047    AutofillField* field = *it;
1048
1049    // To prevent potential section name collisions, add a default suffix for
1050    // other fields.  Without this, 'autocomplete' attribute values
1051    // "section--shipping street-address" and "shipping street-address" would be
1052    // parsed identically, given the section handling code below.  We do this
1053    // before any validation so that fields with invalid attributes still end up
1054    // in the default section.  These default section names will be overridden
1055    // by subsequent heuristic parsing steps if there are no author-specified
1056    // section names.
1057    field->set_section(kDefaultSection);
1058
1059    // Canonicalize the attribute value by trimming whitespace, collapsing
1060    // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1061    std::string autocomplete_attribute =
1062        CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1063    autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1064
1065    // The autocomplete attribute is overloaded: it can specify either a field
1066    // type hint or whether autocomplete should be enabled at all.  Ignore the
1067    // latter type of attribute value.
1068    if (autocomplete_attribute.empty() ||
1069        autocomplete_attribute == "on" ||
1070        autocomplete_attribute == "off") {
1071      continue;
1072    }
1073
1074    // Any other value, even it is invalid, is considered to be a type hint.
1075    // This allows a website's author to specify an attribute like
1076    // autocomplete="other" on a field to disable all Autofill heuristics for
1077    // the form.
1078    *found_types = true;
1079
1080    // Tokenize the attribute value.  Per the spec, the tokens are parsed in
1081    // reverse order.
1082    std::vector<std::string> tokens;
1083    Tokenize(autocomplete_attribute, " ", &tokens);
1084
1085    // The final token must be the field type.
1086    // If it is not one of the known types, abort.
1087    DCHECK(!tokens.empty());
1088    std::string field_type_token = tokens.back();
1089    tokens.pop_back();
1090    HtmlFieldType field_type =
1091        FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1092    if (field_type == HTML_TYPE_UNKNOWN)
1093      continue;
1094
1095    // The preceding token, if any, may be a type hint.
1096    if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1097      // If it is, it must match the field type; otherwise, abort.
1098      // Note that an invalid token invalidates the entire attribute value, even
1099      // if the other tokens are valid.
1100      if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1101        continue;
1102
1103      // Chrome Autofill ignores these type hints.
1104      tokens.pop_back();
1105    }
1106
1107    // The preceding token, if any, may be a fixed string that is either
1108    // "shipping" or "billing".  Chrome Autofill treats these as implicit
1109    // section name suffixes.
1110    DCHECK_EQ(kDefaultSection, field->section());
1111    std::string section = field->section();
1112    HtmlFieldMode mode = HTML_MODE_NONE;
1113    if (!tokens.empty()) {
1114      if (tokens.back() == kShippingMode)
1115        mode = HTML_MODE_SHIPPING;
1116      else if (tokens.back() == kBillingMode)
1117        mode = HTML_MODE_BILLING;
1118    }
1119
1120    if (mode != HTML_MODE_NONE) {
1121      section = "-" + tokens.back();
1122      tokens.pop_back();
1123    }
1124
1125    // The preceding token, if any, may be a named section.
1126    const std::string kSectionPrefix = "section-";
1127    if (!tokens.empty() &&
1128        StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1129      // Prepend this section name to the suffix set in the preceding block.
1130      section = tokens.back().substr(kSectionPrefix.size()) + section;
1131      tokens.pop_back();
1132    }
1133
1134    // No other tokens are allowed.  If there are any remaining, abort.
1135    if (!tokens.empty())
1136      continue;
1137
1138    if (section != kDefaultSection) {
1139      *found_sections = true;
1140      field->set_section(section);
1141    }
1142
1143    // No errors encountered while parsing!
1144    // Update the |field|'s type based on what was parsed from the attribute.
1145    field->SetHtmlType(field_type, mode);
1146  }
1147}
1148
1149void FormStructure::IdentifySections(bool has_author_specified_sections) {
1150  if (fields_.empty())
1151    return;
1152
1153  if (!has_author_specified_sections) {
1154    // Name sections after the first field in the section.
1155    base::string16 current_section = fields_.front()->unique_name();
1156
1157    // Keep track of the types we've seen in this section.
1158    std::set<ServerFieldType> seen_types;
1159    ServerFieldType previous_type = UNKNOWN_TYPE;
1160
1161    for (std::vector<AutofillField*>::iterator field = fields_.begin();
1162         field != fields_.end(); ++field) {
1163      const ServerFieldType current_type = (*field)->Type().GetStorableType();
1164
1165      bool already_saw_current_type = seen_types.count(current_type) > 0;
1166
1167      // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1168      // evening phone number.  Our phone number detection is also generally a
1169      // little off.  Hence, ignore this field type as a signal here.
1170      if (AutofillType(current_type).group() == PHONE_HOME)
1171        already_saw_current_type = false;
1172
1173      // Some forms have adjacent fields of the same type.  Two common examples:
1174      //  * Forms with two email fields, where the second is meant to "confirm"
1175      //    the first.
1176      //  * Forms with a <select> menu for states in some countries, and a
1177      //    freeform <input> field for states in other countries.  (Usually,
1178      //    only one of these two will be visible for any given choice of
1179      //    country.)
1180      // Generally, adjacent fields of the same type belong in the same logical
1181      // section.
1182      if (current_type == previous_type)
1183        already_saw_current_type = false;
1184
1185      previous_type = current_type;
1186
1187      if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1188        // We reached the end of a section, so start a new section.
1189        seen_types.clear();
1190        current_section = (*field)->unique_name();
1191      }
1192
1193      seen_types.insert(current_type);
1194      (*field)->set_section(UTF16ToUTF8(current_section));
1195    }
1196  }
1197
1198  // Ensure that credit card and address fields are in separate sections.
1199  // This simplifies the section-aware logic in autofill_manager.cc.
1200  for (std::vector<AutofillField*>::iterator field = fields_.begin();
1201       field != fields_.end(); ++field) {
1202    FieldTypeGroup field_type_group = (*field)->Type().group();
1203    if (field_type_group == CREDIT_CARD)
1204      (*field)->set_section((*field)->section() + "-cc");
1205    else
1206      (*field)->set_section((*field)->section() + "-default");
1207  }
1208}
1209
1210}  // namespace autofill
1211