form_structure.cc revision 1e9bf3e0803691d0a228da41fc608347b6db4340
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/logging.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/sha1.h"
14#include "base/strings/string_number_conversions.h"
15#include "base/strings/string_util.h"
16#include "base/strings/stringprintf.h"
17#include "base/strings/utf_string_conversions.h"
18#include "base/time/time.h"
19#include "components/autofill/core/browser/autofill_metrics.h"
20#include "components/autofill/core/browser/autofill_type.h"
21#include "components/autofill/core/browser/autofill_xml_parser.h"
22#include "components/autofill/core/browser/field_types.h"
23#include "components/autofill/core/browser/form_field.h"
24#include "components/autofill/core/common/autofill_constants.h"
25#include "components/autofill/core/common/form_data.h"
26#include "components/autofill/core/common/form_data_predictions.h"
27#include "components/autofill/core/common/form_field_data.h"
28#include "components/autofill/core/common/form_field_data_predictions.h"
29#include "third_party/icu/source/i18n/unicode/regex.h"
30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
31
32namespace autofill {
33namespace {
34
35const char kFormMethodPost[] = "post";
36
37// XML elements and attributes.
38const char kAttributeAcceptedFeatures[] = "accepts";
39const char kAttributeAutofillUsed[] = "autofillused";
40const char kAttributeAutofillType[] = "autofilltype";
41const char kAttributeClientVersion[] = "clientversion";
42const char kAttributeDataPresent[] = "datapresent";
43const char kAttributeFieldID[] = "fieldid";
44const char kAttributeFieldType[] = "fieldtype";
45const char kAttributeFormSignature[] = "formsignature";
46const char kAttributeName[] = "name";
47const char kAttributeSignature[] = "signature";
48const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
49const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
50const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
51const char kXMLElementAutofillQuery[] = "autofillquery";
52const char kXMLElementAutofillUpload[] = "autofillupload";
53const char kXMLElementFieldAssignments[] = "fieldassignments";
54const char kXMLElementField[] = "field";
55const char kXMLElementFields[] = "fields";
56const char kXMLElementForm[] = "form";
57const char kBillingMode[] = "billing";
58const char kShippingMode[] = "shipping";
59
60// Stip away >= 5 consecutive digits.
61const char kIgnorePatternInFieldName[] = "\\d{5,}+";
62
63// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
64// |available_field_types| and returns the hex representation as a string.
65std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
66  // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
67  // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
68  const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
69
70  // Pack the types in |available_field_types| into |bit_field|.
71  std::vector<uint8> bit_field(kNumBytes, 0);
72  for (ServerFieldTypeSet::const_iterator field_type =
73           available_field_types.begin();
74       field_type != available_field_types.end();
75       ++field_type) {
76    // Set the appropriate bit in the field.  The bit we set is the one
77    // |field_type| % 8 from the left of the byte.
78    const size_t byte = *field_type / 8;
79    const size_t bit = 0x80 >> (*field_type % 8);
80    DCHECK(byte < bit_field.size());
81    bit_field[byte] |= bit;
82  }
83
84  // Discard any trailing zeroes.
85  // If there are no available types, we return the empty string.
86  size_t data_end = bit_field.size();
87  for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
88  }
89
90  // Print all meaningfull bytes into a string.
91  std::string data_presence;
92  data_presence.reserve(data_end * 2 + 1);
93  for (size_t i = 0; i < data_end; ++i) {
94    base::StringAppendF(&data_presence, "%02x", bit_field[i]);
95  }
96
97  return data_presence;
98}
99
100// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
101// in upload xml, and also add them to the parent XmlElement.
102void EncodeFieldForUpload(const AutofillField& field,
103                          buzz::XmlElement* parent) {
104  // Don't upload checkable fields.
105  if (field.is_checkable)
106    return;
107
108  ServerFieldTypeSet types = field.possible_types();
109  // |types| could be empty in unit-tests only.
110  for (ServerFieldTypeSet::iterator field_type = types.begin();
111       field_type != types.end(); ++field_type) {
112    buzz::XmlElement *field_element = new buzz::XmlElement(
113        buzz::QName(kXMLElementField));
114
115    field_element->SetAttr(buzz::QName(kAttributeSignature),
116                           field.FieldSignature());
117    field_element->SetAttr(buzz::QName(kAttributeAutofillType),
118                           base::IntToString(*field_type));
119    parent->AddElement(field_element);
120  }
121}
122
123// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
124// in query xml, and also add it to the parent XmlElement.
125void EncodeFieldForQuery(const AutofillField& field,
126                         buzz::XmlElement* parent) {
127  buzz::XmlElement *field_element = new buzz::XmlElement(
128      buzz::QName(kXMLElementField));
129  field_element->SetAttr(buzz::QName(kAttributeSignature),
130                         field.FieldSignature());
131  parent->AddElement(field_element);
132}
133
134// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
135// in field assignments xml, and also add them to the parent XmlElement.
136void EncodeFieldForFieldAssignments(const AutofillField& field,
137                                    buzz::XmlElement* parent) {
138  ServerFieldTypeSet types = field.possible_types();
139  for (ServerFieldTypeSet::iterator field_type = types.begin();
140       field_type != types.end(); ++field_type) {
141    buzz::XmlElement *field_element = new buzz::XmlElement(
142        buzz::QName(kXMLElementFields));
143
144    field_element->SetAttr(buzz::QName(kAttributeFieldID),
145                           field.FieldSignature());
146    field_element->SetAttr(buzz::QName(kAttributeFieldType),
147                           base::IntToString(*field_type));
148    field_element->SetAttr(buzz::QName(kAttributeName),
149                           UTF16ToUTF8(field.name));
150    parent->AddElement(field_element);
151  }
152}
153
154// Returns |true| iff the |token| is a type hint for a contact field, as
155// specified in the implementation section of http://is.gd/whatwg_autocomplete
156// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
157// support filling either type of information.
158bool IsContactTypeHint(const std::string& token) {
159  return token == "home" || token == "work" || token == "mobile";
160}
161
162// Returns |true| iff the |token| is a type hint appropriate for a field of the
163// given |field_type|, as specified in the implementation section of
164// http://is.gd/whatwg_autocomplete
165bool ContactTypeHintMatchesFieldType(const std::string& token,
166                                     HtmlFieldType field_type) {
167  // The "home" and "work" type hints are only appropriate for email and phone
168  // number field types.
169  if (token == "home" || token == "work") {
170    return field_type == HTML_TYPE_EMAIL ||
171        (field_type >= HTML_TYPE_TEL &&
172         field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
173  }
174
175  // The "mobile" type hint is only appropriate for phone number field types.
176  // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
177  // support filling either type of information.
178  if (token == "mobile") {
179    return field_type >= HTML_TYPE_TEL &&
180        field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
181  }
182
183  return false;
184}
185
186// Returns the Chrome Autofill-supported field type corresponding to the given
187// |autocomplete_attribute_value|, if there is one, in the context of the given
188// |field|.  Chrome Autofill supports a subset of the field types listed at
189// http://is.gd/whatwg_autocomplete
190HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
191    const std::string& autocomplete_attribute_value,
192    const AutofillField& field) {
193  if (autocomplete_attribute_value == "name")
194    return HTML_TYPE_NAME;
195
196  if (autocomplete_attribute_value == "given-name")
197    return HTML_TYPE_GIVEN_NAME;
198
199  if (autocomplete_attribute_value == "additional-name") {
200    if (field.max_length == 1)
201      return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
202    else
203      return HTML_TYPE_ADDITIONAL_NAME;
204  }
205
206  if (autocomplete_attribute_value == "family-name")
207    return HTML_TYPE_FAMILY_NAME;
208
209  if (autocomplete_attribute_value == "organization")
210    return HTML_TYPE_ORGANIZATION;
211
212  if (autocomplete_attribute_value == "street-address")
213    return HTML_TYPE_STREET_ADDRESS;
214
215  if (autocomplete_attribute_value == "address-line1")
216    return HTML_TYPE_ADDRESS_LINE1;
217
218  if (autocomplete_attribute_value == "address-line2")
219    return HTML_TYPE_ADDRESS_LINE2;
220
221  if (autocomplete_attribute_value == "locality")
222    return HTML_TYPE_LOCALITY;
223
224  if (autocomplete_attribute_value == "region")
225    return HTML_TYPE_REGION;
226
227  if (autocomplete_attribute_value == "country")
228    return HTML_TYPE_COUNTRY_CODE;
229
230  if (autocomplete_attribute_value == "country-name")
231    return HTML_TYPE_COUNTRY_NAME;
232
233  if (autocomplete_attribute_value == "postal-code")
234    return HTML_TYPE_POSTAL_CODE;
235
236  if (autocomplete_attribute_value == "cc-name")
237    return HTML_TYPE_CREDIT_CARD_NAME;
238
239  if (autocomplete_attribute_value == "cc-number")
240    return HTML_TYPE_CREDIT_CARD_NUMBER;
241
242  if (autocomplete_attribute_value == "cc-exp") {
243    if (field.max_length == 5)
244      return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
245    else if (field.max_length == 7)
246      return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
247    else
248      return HTML_TYPE_CREDIT_CARD_EXP;
249  }
250
251  if (autocomplete_attribute_value == "cc-exp-month")
252    return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
253
254  if (autocomplete_attribute_value == "cc-exp-year") {
255    if (field.max_length == 2)
256      return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
257    else if (field.max_length == 4)
258      return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
259    else
260      return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
261  }
262
263  if (autocomplete_attribute_value == "cc-csc")
264    return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
265
266  if (autocomplete_attribute_value == "cc-type")
267    return HTML_TYPE_CREDIT_CARD_TYPE;
268
269  if (autocomplete_attribute_value == "tel")
270    return HTML_TYPE_TEL;
271
272  if (autocomplete_attribute_value == "tel-country-code")
273    return HTML_TYPE_TEL_COUNTRY_CODE;
274
275  if (autocomplete_attribute_value == "tel-national")
276    return HTML_TYPE_TEL_NATIONAL;
277
278  if (autocomplete_attribute_value == "tel-area-code")
279    return HTML_TYPE_TEL_AREA_CODE;
280
281  if (autocomplete_attribute_value == "tel-local")
282    return HTML_TYPE_TEL_LOCAL;
283
284  if (autocomplete_attribute_value == "tel-local-prefix")
285    return HTML_TYPE_TEL_LOCAL_PREFIX;
286
287  if (autocomplete_attribute_value == "tel-local-suffix")
288    return HTML_TYPE_TEL_LOCAL_SUFFIX;
289
290  if (autocomplete_attribute_value == "email")
291    return HTML_TYPE_EMAIL;
292
293  return HTML_TYPE_UNKNOWN;
294}
295
296std::string StripDigitsIfRequired(const base::string16& input) {
297  UErrorCode status = U_ZERO_ERROR;
298  CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
299                         (kIgnorePatternInFieldName));
300  CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
301                         (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
302  DCHECK_EQ(status, U_ZERO_ERROR);
303
304  icu::UnicodeString icu_input(input.data(), input.length());
305  matcher.reset(icu_input);
306
307  icu::UnicodeString replaced_string = matcher.replaceAll("", status);
308
309  std::string return_string;
310  status = U_ZERO_ERROR;
311  UTF16ToUTF8(replaced_string.getBuffer(),
312              static_cast<size_t>(replaced_string.length()),
313              &return_string);
314  if (status != U_ZERO_ERROR) {
315    DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input);
316    return UTF16ToUTF8(input);
317  }
318
319  return return_string;
320}
321
322}  // namespace
323
324FormStructure::FormStructure(const FormData& form)
325    : form_name_(form.name),
326      source_url_(form.origin),
327      target_url_(form.action),
328      autofill_count_(0),
329      active_field_count_(0),
330      upload_required_(USE_UPLOAD_RATES),
331      server_experiment_id_("no server response"),
332      has_author_specified_types_(false) {
333  // Copy the form fields.
334  std::map<base::string16, size_t> unique_names;
335  for (std::vector<FormFieldData>::const_iterator field =
336           form.fields.begin();
337       field != form.fields.end(); ++field) {
338    if (!ShouldSkipField(*field)) {
339      // Add all supported form fields (including with empty names) to the
340      // signature.  This is a requirement for Autofill servers.
341      form_signature_field_names_.append("&");
342      form_signature_field_names_.append(StripDigitsIfRequired(field->name));
343
344      ++active_field_count_;
345    }
346
347    // Generate a unique name for this field by appending a counter to the name.
348    // Make sure to prepend the counter with a non-numeric digit so that we are
349    // guaranteed to avoid collisions.
350    if (!unique_names.count(field->name))
351      unique_names[field->name] = 1;
352    else
353      ++unique_names[field->name];
354    base::string16 unique_name = field->name + ASCIIToUTF16("_") +
355        base::IntToString16(unique_names[field->name]);
356    fields_.push_back(new AutofillField(*field, unique_name));
357  }
358
359  std::string method = UTF16ToUTF8(form.method);
360  if (StringToLowerASCII(method) == kFormMethodPost) {
361    method_ = POST;
362  } else {
363    // Either the method is 'get', or we don't know.  In this case we default
364    // to GET.
365    method_ = GET;
366  }
367}
368
369FormStructure::~FormStructure() {}
370
371void FormStructure::DetermineHeuristicTypes(
372    const AutofillMetrics& metric_logger) {
373  // First, try to detect field types based on each field's |autocomplete|
374  // attribute value.  If there is at least one form field that specifies an
375  // autocomplete type hint, don't try to apply other heuristics to match fields
376  // in this form.
377  bool has_author_specified_sections;
378  ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
379                                            &has_author_specified_sections);
380
381  if (!has_author_specified_types_) {
382    ServerFieldTypeMap field_type_map;
383    FormField::ParseFormFields(fields_.get(), &field_type_map);
384    for (size_t i = 0; i < field_count(); ++i) {
385      AutofillField* field = fields_[i];
386      ServerFieldTypeMap::iterator iter =
387          field_type_map.find(field->unique_name());
388      if (iter != field_type_map.end())
389        field->set_heuristic_type(iter->second);
390    }
391  }
392
393  UpdateAutofillCount();
394  IdentifySections(has_author_specified_sections);
395
396  if (IsAutofillable(true)) {
397    metric_logger.LogDeveloperEngagementMetric(
398        AutofillMetrics::FILLABLE_FORM_PARSED);
399    if (has_author_specified_types_) {
400      metric_logger.LogDeveloperEngagementMetric(
401          AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
402    }
403  }
404}
405
406bool FormStructure::EncodeUploadRequest(
407    const ServerFieldTypeSet& available_field_types,
408    bool form_was_autofilled,
409    std::string* encoded_xml) const {
410  DCHECK(ShouldBeCrowdsourced());
411
412  // Verify that |available_field_types| agrees with the possible field types we
413  // are uploading.
414  for (std::vector<AutofillField*>::const_iterator field = begin();
415       field != end();
416       ++field) {
417    for (ServerFieldTypeSet::const_iterator type =
418             (*field)->possible_types().begin();
419         type != (*field)->possible_types().end();
420         ++type) {
421      DCHECK(*type == UNKNOWN_TYPE ||
422             *type == EMPTY_TYPE ||
423             available_field_types.count(*type));
424    }
425  }
426
427  // Set up the <autofillupload> element and its attributes.
428  buzz::XmlElement autofill_request_xml(
429      (buzz::QName(kXMLElementAutofillUpload)));
430  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
431                               kClientVersion);
432  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
433                               FormSignature());
434  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
435                               form_was_autofilled ? "true" : "false");
436  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
437                               EncodeFieldTypes(available_field_types).c_str());
438
439  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
440    return false;  // Malformed form, skip it.
441
442  // Obtain the XML structure as a string.
443  *encoded_xml = kXMLDeclaration;
444  *encoded_xml += autofill_request_xml.Str().c_str();
445
446  // To enable this logging, run with the flag --vmodule="form_structure=2".
447  VLOG(2) << "\n" << *encoded_xml;
448
449  return true;
450}
451
452bool FormStructure::EncodeFieldAssignments(
453    const ServerFieldTypeSet& available_field_types,
454    std::string* encoded_xml) const {
455  DCHECK(ShouldBeCrowdsourced());
456
457  // Set up the <fieldassignments> element and its attributes.
458  buzz::XmlElement autofill_request_xml(
459      (buzz::QName(kXMLElementFieldAssignments)));
460  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
461                               FormSignature());
462
463  if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
464                         &autofill_request_xml))
465    return false;  // Malformed form, skip it.
466
467  // Obtain the XML structure as a string.
468  *encoded_xml = kXMLDeclaration;
469  *encoded_xml += autofill_request_xml.Str().c_str();
470
471  return true;
472}
473
474// static
475bool FormStructure::EncodeQueryRequest(
476    const std::vector<FormStructure*>& forms,
477    std::vector<std::string>* encoded_signatures,
478    std::string* encoded_xml) {
479  DCHECK(encoded_signatures);
480  DCHECK(encoded_xml);
481  encoded_xml->clear();
482  encoded_signatures->clear();
483  encoded_signatures->reserve(forms.size());
484
485  // Set up the <autofillquery> element and attributes.
486  buzz::XmlElement autofill_request_xml(
487      (buzz::QName(kXMLElementAutofillQuery)));
488  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
489                               kClientVersion);
490
491  // Some badly formatted web sites repeat forms - detect that and encode only
492  // one form as returned data would be the same for all the repeated forms.
493  std::set<std::string> processed_forms;
494  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
495       it != forms.end();
496       ++it) {
497    std::string signature((*it)->FormSignature());
498    if (processed_forms.find(signature) != processed_forms.end())
499      continue;
500    processed_forms.insert(signature);
501    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
502        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
503    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
504                                      signature);
505
506    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
507                                  encompassing_xml_element.get()))
508      continue;  // Malformed form, skip it.
509
510    autofill_request_xml.AddElement(encompassing_xml_element.release());
511    encoded_signatures->push_back(signature);
512  }
513
514  if (!encoded_signatures->size())
515    return false;
516
517  autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
518                               kAcceptedFeaturesExperiment);
519
520  // Obtain the XML structure as a string.
521  *encoded_xml = kXMLDeclaration;
522  *encoded_xml += autofill_request_xml.Str().c_str();
523
524  return true;
525}
526
527// static
528void FormStructure::ParseQueryResponse(
529    const std::string& response_xml,
530    const std::vector<FormStructure*>& forms,
531    const AutofillMetrics& metric_logger) {
532  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
533
534  // Parse the field types from the server response to the query.
535  std::vector<AutofillServerFieldInfo> field_infos;
536  UploadRequired upload_required;
537  std::string experiment_id;
538  AutofillQueryXmlParser parse_handler(&field_infos,
539                                       &upload_required,
540                                       &experiment_id);
541  buzz::XmlParser parser(&parse_handler);
542  parser.Parse(response_xml.c_str(), response_xml.length(), true);
543  if (!parse_handler.succeeded())
544    return;
545
546  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
547  metric_logger.LogServerExperimentIdForQuery(experiment_id);
548
549  bool heuristics_detected_fillable_field = false;
550  bool query_response_overrode_heuristics = false;
551
552  // Copy the field types into the actual form.
553  std::vector<AutofillServerFieldInfo>::iterator current_info =
554      field_infos.begin();
555  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
556       iter != forms.end(); ++iter) {
557    FormStructure* form = *iter;
558    form->upload_required_ = upload_required;
559    form->server_experiment_id_ = experiment_id;
560
561    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
562         field != form->fields_.end(); ++field) {
563      if (form->ShouldSkipField(**field))
564        continue;
565
566      // In some cases *successful* response does not return all the fields.
567      // Quit the update of the types then.
568      if (current_info == field_infos.end())
569        break;
570
571      // UNKNOWN_TYPE is reserved for use by the client.
572      DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
573
574      ServerFieldType heuristic_type = (*field)->heuristic_type();
575      if (heuristic_type != UNKNOWN_TYPE)
576        heuristics_detected_fillable_field = true;
577
578      (*field)->set_server_type(current_info->field_type);
579      if (heuristic_type != (*field)->Type().GetStorableType())
580        query_response_overrode_heuristics = true;
581
582      // Copy default value into the field if available.
583      if (!current_info->default_value.empty())
584        (*field)->set_default_value(current_info->default_value);
585
586      ++current_info;
587    }
588
589    form->UpdateAutofillCount();
590    form->IdentifySections(false);
591  }
592
593  AutofillMetrics::ServerQueryMetric metric;
594  if (query_response_overrode_heuristics) {
595    if (heuristics_detected_fillable_field) {
596      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
597    } else {
598      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
599    }
600  } else {
601    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
602  }
603  metric_logger.LogServerQueryMetric(metric);
604}
605
606// static
607void FormStructure::GetFieldTypePredictions(
608    const std::vector<FormStructure*>& form_structures,
609    std::vector<FormDataPredictions>* forms) {
610  forms->clear();
611  forms->reserve(form_structures.size());
612  for (size_t i = 0; i < form_structures.size(); ++i) {
613    FormStructure* form_structure = form_structures[i];
614    FormDataPredictions form;
615    form.data.name = form_structure->form_name_;
616    form.data.method =
617        ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
618    form.data.origin = form_structure->source_url_;
619    form.data.action = form_structure->target_url_;
620    form.signature = form_structure->FormSignature();
621    form.experiment_id = form_structure->server_experiment_id_;
622
623    for (std::vector<AutofillField*>::const_iterator field =
624             form_structure->fields_.begin();
625         field != form_structure->fields_.end(); ++field) {
626      form.data.fields.push_back(FormFieldData(**field));
627
628      FormFieldDataPredictions annotated_field;
629      annotated_field.signature = (*field)->FieldSignature();
630      annotated_field.heuristic_type =
631          AutofillType((*field)->heuristic_type()).ToString();
632      annotated_field.server_type =
633          AutofillType((*field)->server_type()).ToString();
634      annotated_field.overall_type = (*field)->Type().ToString();
635      form.fields.push_back(annotated_field);
636    }
637
638    forms->push_back(form);
639  }
640}
641
642std::string FormStructure::FormSignature() const {
643  std::string scheme(target_url_.scheme());
644  std::string host(target_url_.host());
645
646  // If target host or scheme is empty, set scheme and host of source url.
647  // This is done to match the Toolbar's behavior.
648  if (scheme.empty() || host.empty()) {
649    scheme = source_url_.scheme();
650    host = source_url_.host();
651  }
652
653  std::string form_string = scheme + "://" + host + "&" +
654                            UTF16ToUTF8(form_name_) +
655                            form_signature_field_names_;
656
657  return Hash64Bit(form_string);
658}
659
660bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
661  return field.is_checkable;
662}
663
664bool FormStructure::IsAutofillable(bool require_method_post) const {
665  if (autofill_count() < kRequiredAutofillFields)
666    return false;
667
668  return ShouldBeParsed(require_method_post);
669}
670
671void FormStructure::UpdateAutofillCount() {
672  autofill_count_ = 0;
673  for (std::vector<AutofillField*>::const_iterator iter = begin();
674       iter != end(); ++iter) {
675    AutofillField* field = *iter;
676    if (field && field->IsFieldFillable())
677      ++autofill_count_;
678  }
679}
680
681bool FormStructure::ShouldBeParsed(bool require_method_post) const {
682  if (active_field_count() < kRequiredAutofillFields)
683    return false;
684
685  // Rule out http(s)://*/search?...
686  //  e.g. http://www.google.com/search?q=...
687  //       http://search.yahoo.com/search?p=...
688  if (target_url_.path() == "/search")
689    return false;
690
691  bool has_text_field = false;
692  for (std::vector<AutofillField*>::const_iterator it = begin();
693       it != end() && !has_text_field; ++it) {
694    has_text_field |= (*it)->form_control_type != "select-one";
695  }
696  if (!has_text_field)
697    return false;
698
699  return !require_method_post || (method_ == POST);
700}
701
702bool FormStructure::ShouldBeCrowdsourced() const {
703  return !has_author_specified_types_ && ShouldBeParsed(true);
704}
705
706void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
707  // Map from field signatures to cached fields.
708  std::map<std::string, const AutofillField*> cached_fields;
709  for (size_t i = 0; i < cached_form.field_count(); ++i) {
710    const AutofillField* field = cached_form.field(i);
711    cached_fields[field->FieldSignature()] = field;
712  }
713
714  for (std::vector<AutofillField*>::const_iterator iter = begin();
715       iter != end(); ++iter) {
716    AutofillField* field = *iter;
717
718    std::map<std::string, const AutofillField*>::const_iterator
719        cached_field = cached_fields.find(field->FieldSignature());
720    if (cached_field != cached_fields.end()) {
721      if (field->form_control_type != "select-one" &&
722          field->value == cached_field->second->value) {
723        // From the perspective of learning user data, text fields containing
724        // default values are equivalent to empty fields.
725        field->value = base::string16();
726      }
727
728      field->set_heuristic_type(cached_field->second->heuristic_type());
729      field->set_server_type(cached_field->second->server_type());
730    }
731  }
732
733  UpdateAutofillCount();
734
735  server_experiment_id_ = cached_form.server_experiment_id();
736
737  // The form signature should match between query and upload requests to the
738  // server. On many websites, form elements are dynamically added, removed, or
739  // rearranged via JavaScript between page load and form submission, so we
740  // copy over the |form_signature_field_names_| corresponding to the query
741  // request.
742  DCHECK_EQ(cached_form.form_name_, form_name_);
743  DCHECK_EQ(cached_form.source_url_, source_url_);
744  DCHECK_EQ(cached_form.target_url_, target_url_);
745  form_signature_field_names_ = cached_form.form_signature_field_names_;
746}
747
748void FormStructure::LogQualityMetrics(
749    const AutofillMetrics& metric_logger,
750    const base::TimeTicks& load_time,
751    const base::TimeTicks& interaction_time,
752    const base::TimeTicks& submission_time) const {
753  std::string experiment_id = server_experiment_id();
754  metric_logger.LogServerExperimentIdForUpload(experiment_id);
755
756  size_t num_detected_field_types = 0;
757  bool did_autofill_all_possible_fields = true;
758  bool did_autofill_some_possible_fields = false;
759  for (size_t i = 0; i < field_count(); ++i) {
760    const AutofillField* field = this->field(i);
761    metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
762                                   experiment_id);
763
764    // No further logging for empty fields nor for fields where the entered data
765    // does not appear to already exist in the user's stored Autofill data.
766    const ServerFieldTypeSet& field_types = field->possible_types();
767    DCHECK(!field_types.empty());
768    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
769      continue;
770
771    ++num_detected_field_types;
772    if (field->is_autofilled)
773      did_autofill_some_possible_fields = true;
774    else
775      did_autofill_all_possible_fields = false;
776
777    // Collapse field types that Chrome treats as identical, e.g. home and
778    // billing address fields.
779    ServerFieldTypeSet collapsed_field_types;
780    for (ServerFieldTypeSet::const_iterator it = field_types.begin();
781         it != field_types.end();
782         ++it) {
783      // Since we currently only support US phone numbers, the (city code + main
784      // digits) number is almost always identical to the whole phone number.
785      // TODO(isherman): Improve this logic once we add support for
786      // international numbers.
787      if (*it == PHONE_HOME_CITY_AND_NUMBER)
788        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
789      else
790        collapsed_field_types.insert(AutofillType(*it).GetStorableType());
791    }
792
793    // Capture the field's type, if it is unambiguous.
794    ServerFieldType field_type = UNKNOWN_TYPE;
795    if (collapsed_field_types.size() == 1)
796      field_type = *collapsed_field_types.begin();
797
798    ServerFieldType heuristic_type =
799        AutofillType(field->heuristic_type()).GetStorableType();
800    ServerFieldType server_type =
801        AutofillType(field->server_type()).GetStorableType();
802    ServerFieldType predicted_type = field->Type().GetStorableType();
803
804    // Log heuristic, server, and overall type quality metrics, independently of
805    // whether the field was autofilled.
806    if (heuristic_type == UNKNOWN_TYPE) {
807      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
808                                               field_type, experiment_id);
809    } else if (field_types.count(heuristic_type)) {
810      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
811                                               field_type, experiment_id);
812    } else {
813      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
814                                               field_type, experiment_id);
815    }
816
817    if (server_type == NO_SERVER_DATA) {
818      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
819                                            field_type, experiment_id);
820    } else if (field_types.count(server_type)) {
821      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
822                                            field_type, experiment_id);
823    } else {
824      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
825                                            field_type, experiment_id);
826    }
827
828    if (predicted_type == UNKNOWN_TYPE) {
829      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
830                                             field_type, experiment_id);
831    } else if (field_types.count(predicted_type)) {
832      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
833                                             field_type, experiment_id);
834    } else {
835      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
836                                             field_type, experiment_id);
837    }
838
839    // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
840    // have to skip them for the remaining metrics.
841    if (field->form_control_type == "select-one")
842      continue;
843
844    if (field->is_autofilled) {
845      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
846                                     experiment_id);
847    } else {
848      metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
849                                     experiment_id);
850
851      if (heuristic_type == UNKNOWN_TYPE) {
852        metric_logger.LogQualityMetric(
853            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
854            experiment_id);
855      } else if (field_types.count(heuristic_type)) {
856        metric_logger.LogQualityMetric(
857            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
858            experiment_id);
859      } else {
860        metric_logger.LogQualityMetric(
861            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
862            experiment_id);
863      }
864
865      if (server_type == NO_SERVER_DATA) {
866        metric_logger.LogQualityMetric(
867            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
868            experiment_id);
869      } else if (field_types.count(server_type)) {
870        metric_logger.LogQualityMetric(
871            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
872            experiment_id);
873      } else {
874        metric_logger.LogQualityMetric(
875            AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
876            experiment_id);
877      }
878    }
879  }
880
881  if (num_detected_field_types < kRequiredAutofillFields) {
882    metric_logger.LogUserHappinessMetric(
883        AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
884  } else {
885    if (did_autofill_all_possible_fields) {
886      metric_logger.LogUserHappinessMetric(
887          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
888    } else if (did_autofill_some_possible_fields) {
889      metric_logger.LogUserHappinessMetric(
890          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
891    } else {
892      metric_logger.LogUserHappinessMetric(
893          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
894    }
895
896    // Unlike the other times, the |submission_time| should always be available.
897    DCHECK(!submission_time.is_null());
898
899    // The |load_time| might be unset, in the case that the form was dynamically
900    // added to the DOM.
901    if (!load_time.is_null()) {
902      // Submission should always chronologically follow form load.
903      DCHECK(submission_time > load_time);
904      base::TimeDelta elapsed = submission_time - load_time;
905      if (did_autofill_some_possible_fields)
906        metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
907      else
908        metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
909    }
910
911    // The |interaction_time| might be unset, in the case that the user
912    // submitted a blank form.
913    if (!interaction_time.is_null()) {
914      // Submission should always chronologically follow interaction.
915      DCHECK(submission_time > interaction_time);
916      base::TimeDelta elapsed = submission_time - interaction_time;
917      if (did_autofill_some_possible_fields) {
918        metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
919      } else {
920        metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
921            elapsed);
922      }
923    }
924  }
925}
926
927const AutofillField* FormStructure::field(size_t index) const {
928  if (index >= fields_.size()) {
929    NOTREACHED();
930    return NULL;
931  }
932
933  return fields_[index];
934}
935
936AutofillField* FormStructure::field(size_t index) {
937  return const_cast<AutofillField*>(
938      static_cast<const FormStructure*>(this)->field(index));
939}
940
941size_t FormStructure::field_count() const {
942  return fields_.size();
943}
944
945size_t FormStructure::active_field_count() const {
946  return active_field_count_;
947}
948
949std::string FormStructure::server_experiment_id() const {
950  return server_experiment_id_;
951}
952
953FormData FormStructure::ToFormData() const {
954  // |data.user_submitted| will always be false.
955  FormData data;
956  data.name = form_name_;
957  data.origin = source_url_;
958  data.action = target_url_;
959  data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
960
961  for (size_t i = 0; i < fields_.size(); ++i) {
962    data.fields.push_back(FormFieldData(*fields_[i]));
963  }
964
965  return data;
966}
967
968bool FormStructure::operator==(const FormData& form) const {
969  // TODO(jhawkins): Is this enough to differentiate a form?
970  if (form_name_ == form.name &&
971      source_url_ == form.origin &&
972      target_url_ == form.action) {
973    return true;
974  }
975
976  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
977  // set up.
978
979  return false;
980}
981
982bool FormStructure::operator!=(const FormData& form) const {
983  return !operator==(form);
984}
985
986std::string FormStructure::Hash64Bit(const std::string& str) {
987  std::string hash_bin = base::SHA1HashString(str);
988  DCHECK_EQ(20U, hash_bin.length());
989
990  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
991                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
992                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
993                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
994                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
995                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
996                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
997                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);
998
999  return base::Uint64ToString(hash64);
1000}
1001
1002bool FormStructure::EncodeFormRequest(
1003    FormStructure::EncodeRequestType request_type,
1004    buzz::XmlElement* encompassing_xml_element) const {
1005  if (!field_count())  // Nothing to add.
1006    return false;
1007
1008  // Some badly formatted web sites repeat fields - limit number of fields to
1009  // 48, which is far larger than any valid form and XML still fits into 2K.
1010  // Do not send requests for forms with more than this many fields, as they are
1011  // near certainly not valid/auto-fillable.
1012  const size_t kMaxFieldsOnTheForm = 48;
1013  if (field_count() > kMaxFieldsOnTheForm)
1014    return false;
1015
1016  // Add the child nodes for the form fields.
1017  for (size_t index = 0; index < field_count(); ++index) {
1018    const AutofillField* field = fields_[index];
1019    switch (request_type) {
1020      case FormStructure::UPLOAD:
1021        EncodeFieldForUpload(*field, encompassing_xml_element);
1022        break;
1023      case FormStructure::QUERY:
1024        if (ShouldSkipField(*field))
1025          continue;
1026        EncodeFieldForQuery(*field, encompassing_xml_element);
1027        break;
1028      case FormStructure::FIELD_ASSIGNMENTS:
1029        EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
1030        break;
1031    }
1032  }
1033  return true;
1034}
1035
1036void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1037    bool* found_types,
1038    bool* found_sections) {
1039  const std::string kDefaultSection = "-default";
1040
1041  *found_types = false;
1042  *found_sections = false;
1043  for (std::vector<AutofillField*>::iterator it = fields_.begin();
1044       it != fields_.end(); ++it) {
1045    AutofillField* field = *it;
1046
1047    // To prevent potential section name collisions, add a default suffix for
1048    // other fields.  Without this, 'autocomplete' attribute values
1049    // "section--shipping street-address" and "shipping street-address" would be
1050    // parsed identically, given the section handling code below.  We do this
1051    // before any validation so that fields with invalid attributes still end up
1052    // in the default section.  These default section names will be overridden
1053    // by subsequent heuristic parsing steps if there are no author-specified
1054    // section names.
1055    field->set_section(kDefaultSection);
1056
1057    // Canonicalize the attribute value by trimming whitespace, collapsing
1058    // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1059    std::string autocomplete_attribute =
1060        CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1061    autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1062
1063    // The autocomplete attribute is overloaded: it can specify either a field
1064    // type hint or whether autocomplete should be enabled at all.  Ignore the
1065    // latter type of attribute value.
1066    if (autocomplete_attribute.empty() ||
1067        autocomplete_attribute == "on" ||
1068        autocomplete_attribute == "off") {
1069      continue;
1070    }
1071
1072    // Any other value, even it is invalid, is considered to be a type hint.
1073    // This allows a website's author to specify an attribute like
1074    // autocomplete="other" on a field to disable all Autofill heuristics for
1075    // the form.
1076    *found_types = true;
1077
1078    // Tokenize the attribute value.  Per the spec, the tokens are parsed in
1079    // reverse order.
1080    std::vector<std::string> tokens;
1081    Tokenize(autocomplete_attribute, " ", &tokens);
1082
1083    // The final token must be the field type.
1084    // If it is not one of the known types, abort.
1085    DCHECK(!tokens.empty());
1086    std::string field_type_token = tokens.back();
1087    tokens.pop_back();
1088    HtmlFieldType field_type =
1089        FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1090    if (field_type == HTML_TYPE_UNKNOWN)
1091      continue;
1092
1093    // The preceding token, if any, may be a type hint.
1094    if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1095      // If it is, it must match the field type; otherwise, abort.
1096      // Note that an invalid token invalidates the entire attribute value, even
1097      // if the other tokens are valid.
1098      if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1099        continue;
1100
1101      // Chrome Autofill ignores these type hints.
1102      tokens.pop_back();
1103    }
1104
1105    // The preceding token, if any, may be a fixed string that is either
1106    // "shipping" or "billing".  Chrome Autofill treats these as implicit
1107    // section name suffixes.
1108    DCHECK_EQ(kDefaultSection, field->section());
1109    std::string section = field->section();
1110    HtmlFieldMode mode = HTML_MODE_NONE;
1111    if (!tokens.empty()) {
1112      if (tokens.back() == kShippingMode)
1113        mode = HTML_MODE_SHIPPING;
1114      else if (tokens.back() == kBillingMode)
1115        mode = HTML_MODE_BILLING;
1116    }
1117
1118    if (mode != HTML_MODE_NONE) {
1119      section = "-" + tokens.back();
1120      tokens.pop_back();
1121    }
1122
1123    // The preceding token, if any, may be a named section.
1124    const std::string kSectionPrefix = "section-";
1125    if (!tokens.empty() &&
1126        StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1127      // Prepend this section name to the suffix set in the preceding block.
1128      section = tokens.back().substr(kSectionPrefix.size()) + section;
1129      tokens.pop_back();
1130    }
1131
1132    // No other tokens are allowed.  If there are any remaining, abort.
1133    if (!tokens.empty())
1134      continue;
1135
1136    if (section != kDefaultSection) {
1137      *found_sections = true;
1138      field->set_section(section);
1139    }
1140
1141    // No errors encountered while parsing!
1142    // Update the |field|'s type based on what was parsed from the attribute.
1143    field->SetHtmlType(field_type, mode);
1144  }
1145}
1146
1147void FormStructure::IdentifySections(bool has_author_specified_sections) {
1148  if (fields_.empty())
1149    return;
1150
1151  if (!has_author_specified_sections) {
1152    // Name sections after the first field in the section.
1153    base::string16 current_section = fields_.front()->unique_name();
1154
1155    // Keep track of the types we've seen in this section.
1156    std::set<ServerFieldType> seen_types;
1157    ServerFieldType previous_type = UNKNOWN_TYPE;
1158
1159    for (std::vector<AutofillField*>::iterator field = fields_.begin();
1160         field != fields_.end(); ++field) {
1161      const ServerFieldType current_type = (*field)->Type().GetStorableType();
1162
1163      bool already_saw_current_type = seen_types.count(current_type) > 0;
1164
1165      // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1166      // evening phone number.  Our phone number detection is also generally a
1167      // little off.  Hence, ignore this field type as a signal here.
1168      if (AutofillType(current_type).group() == PHONE_HOME)
1169        already_saw_current_type = false;
1170
1171      // Some forms have adjacent fields of the same type.  Two common examples:
1172      //  * Forms with two email fields, where the second is meant to "confirm"
1173      //    the first.
1174      //  * Forms with a <select> menu for states in some countries, and a
1175      //    freeform <input> field for states in other countries.  (Usually,
1176      //    only one of these two will be visible for any given choice of
1177      //    country.)
1178      // Generally, adjacent fields of the same type belong in the same logical
1179      // section.
1180      if (current_type == previous_type)
1181        already_saw_current_type = false;
1182
1183      previous_type = current_type;
1184
1185      if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1186        // We reached the end of a section, so start a new section.
1187        seen_types.clear();
1188        current_section = (*field)->unique_name();
1189      }
1190
1191      seen_types.insert(current_type);
1192      (*field)->set_section(UTF16ToUTF8(current_section));
1193    }
1194  }
1195
1196  // Ensure that credit card and address fields are in separate sections.
1197  // This simplifies the section-aware logic in autofill_manager.cc.
1198  for (std::vector<AutofillField*>::iterator field = fields_.begin();
1199       field != fields_.end(); ++field) {
1200    FieldTypeGroup field_type_group = (*field)->Type().group();
1201    if (field_type_group == CREDIT_CARD)
1202      (*field)->set_section((*field)->section() + "-cc");
1203    else
1204      (*field)->set_section((*field)->section() + "-default");
1205  }
1206}
1207
1208}  // namespace autofill
1209