form_structure.cc revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/core/browser/form_structure.h"
6
7#include <utility>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/i18n/case_conversion.h"
12#include "base/logging.h"
13#include "base/memory/scoped_ptr.h"
14#include "base/sha1.h"
15#include "base/strings/string_number_conversions.h"
16#include "base/strings/string_util.h"
17#include "base/strings/stringprintf.h"
18#include "base/strings/utf_string_conversions.h"
19#include "base/time/time.h"
20#include "components/autofill/core/browser/autofill_metrics.h"
21#include "components/autofill/core/browser/autofill_type.h"
22#include "components/autofill/core/browser/autofill_xml_parser.h"
23#include "components/autofill/core/browser/field_types.h"
24#include "components/autofill/core/browser/form_field.h"
25#include "components/autofill/core/common/autofill_constants.h"
26#include "components/autofill/core/common/form_data.h"
27#include "components/autofill/core/common/form_data_predictions.h"
28#include "components/autofill/core/common/form_field_data.h"
29#include "components/autofill/core/common/form_field_data_predictions.h"
30#include "third_party/icu/source/i18n/unicode/regex.h"
31#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
32
33namespace autofill {
34namespace {
35
36const char kFormMethodPost[] = "post";
37
38// XML elements and attributes.
39const char kAttributeAutofillUsed[] = "autofillused";
40const char kAttributeAutofillType[] = "autofilltype";
41const char kAttributeClientVersion[] = "clientversion";
42const char kAttributeDataPresent[] = "datapresent";
43const char kAttributeFieldID[] = "fieldid";
44const char kAttributeFieldType[] = "fieldtype";
45const char kAttributeFormSignature[] = "formsignature";
46const char kAttributeName[] = "name";
47const char kAttributeSignature[] = "signature";
48const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
49const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
50const char kXMLElementAutofillQuery[] = "autofillquery";
51const char kXMLElementAutofillUpload[] = "autofillupload";
52const char kXMLElementFieldAssignments[] = "fieldassignments";
53const char kXMLElementField[] = "field";
54const char kXMLElementFields[] = "fields";
55const char kXMLElementForm[] = "form";
56const char kBillingMode[] = "billing";
57const char kShippingMode[] = "shipping";
58
59// Stip away >= 5 consecutive digits.
60const char kIgnorePatternInFieldName[] = "\\d{5,}+";
61
62// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
63// |available_field_types| and returns the hex representation as a string.
64std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
65  // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
66  // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
67  const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
68
69  // Pack the types in |available_field_types| into |bit_field|.
70  std::vector<uint8> bit_field(kNumBytes, 0);
71  for (ServerFieldTypeSet::const_iterator field_type =
72           available_field_types.begin();
73       field_type != available_field_types.end();
74       ++field_type) {
75    // Set the appropriate bit in the field.  The bit we set is the one
76    // |field_type| % 8 from the left of the byte.
77    const size_t byte = *field_type / 8;
78    const size_t bit = 0x80 >> (*field_type % 8);
79    DCHECK(byte < bit_field.size());
80    bit_field[byte] |= bit;
81  }
82
83  // Discard any trailing zeroes.
84  // If there are no available types, we return the empty string.
85  size_t data_end = bit_field.size();
86  for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
87  }
88
89  // Print all meaningfull bytes into a string.
90  std::string data_presence;
91  data_presence.reserve(data_end * 2 + 1);
92  for (size_t i = 0; i < data_end; ++i) {
93    base::StringAppendF(&data_presence, "%02x", bit_field[i]);
94  }
95
96  return data_presence;
97}
98
99// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
100// in upload xml, and also add them to the parent XmlElement.
101void EncodeFieldForUpload(const AutofillField& field,
102                          buzz::XmlElement* parent) {
103  // Don't upload checkable fields.
104  if (field.is_checkable)
105    return;
106
107  ServerFieldTypeSet types = field.possible_types();
108  // |types| could be empty in unit-tests only.
109  for (ServerFieldTypeSet::iterator field_type = types.begin();
110       field_type != types.end(); ++field_type) {
111    buzz::XmlElement *field_element = new buzz::XmlElement(
112        buzz::QName(kXMLElementField));
113
114    field_element->SetAttr(buzz::QName(kAttributeSignature),
115                           field.FieldSignature());
116    field_element->SetAttr(buzz::QName(kAttributeAutofillType),
117                           base::IntToString(*field_type));
118    parent->AddElement(field_element);
119  }
120}
121
122// Helper for |EncodeFormRequest()| that creates XmlElement for the given field
123// in query xml, and also add it to the parent XmlElement.
124void EncodeFieldForQuery(const AutofillField& field,
125                         buzz::XmlElement* parent) {
126  buzz::XmlElement *field_element = new buzz::XmlElement(
127      buzz::QName(kXMLElementField));
128  field_element->SetAttr(buzz::QName(kAttributeSignature),
129                         field.FieldSignature());
130  parent->AddElement(field_element);
131}
132
133// Helper for |EncodeFormRequest()| that creates XmlElements for the given field
134// in field assignments xml, and also add them to the parent XmlElement.
135void EncodeFieldForFieldAssignments(const AutofillField& field,
136                                    buzz::XmlElement* parent) {
137  ServerFieldTypeSet types = field.possible_types();
138  for (ServerFieldTypeSet::iterator field_type = types.begin();
139       field_type != types.end(); ++field_type) {
140    buzz::XmlElement *field_element = new buzz::XmlElement(
141        buzz::QName(kXMLElementFields));
142
143    field_element->SetAttr(buzz::QName(kAttributeFieldID),
144                           field.FieldSignature());
145    field_element->SetAttr(buzz::QName(kAttributeFieldType),
146                           base::IntToString(*field_type));
147    field_element->SetAttr(buzz::QName(kAttributeName),
148                           base::UTF16ToUTF8(field.name));
149    parent->AddElement(field_element);
150  }
151}
152
153// Returns |true| iff the |token| is a type hint for a contact field, as
154// specified in the implementation section of http://is.gd/whatwg_autocomplete
155// Note that "fax" and "pager" are intentionally ignored, as Chrome does not
156// support filling either type of information.
157bool IsContactTypeHint(const std::string& token) {
158  return token == "home" || token == "work" || token == "mobile";
159}
160
161// Returns |true| iff the |token| is a type hint appropriate for a field of the
162// given |field_type|, as specified in the implementation section of
163// http://is.gd/whatwg_autocomplete
164bool ContactTypeHintMatchesFieldType(const std::string& token,
165                                     HtmlFieldType field_type) {
166  // The "home" and "work" type hints are only appropriate for email and phone
167  // number field types.
168  if (token == "home" || token == "work") {
169    return field_type == HTML_TYPE_EMAIL ||
170        (field_type >= HTML_TYPE_TEL &&
171         field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
172  }
173
174  // The "mobile" type hint is only appropriate for phone number field types.
175  // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
176  // support filling either type of information.
177  if (token == "mobile") {
178    return field_type >= HTML_TYPE_TEL &&
179        field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
180  }
181
182  return false;
183}
184
185// Returns the Chrome Autofill-supported field type corresponding to the given
186// |autocomplete_attribute_value|, if there is one, in the context of the given
187// |field|.  Chrome Autofill supports a subset of the field types listed at
188// http://is.gd/whatwg_autocomplete
189HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
190    const std::string& autocomplete_attribute_value,
191    const AutofillField& field) {
192  if (autocomplete_attribute_value == "name")
193    return HTML_TYPE_NAME;
194
195  if (autocomplete_attribute_value == "given-name")
196    return HTML_TYPE_GIVEN_NAME;
197
198  if (autocomplete_attribute_value == "additional-name") {
199    if (field.max_length == 1)
200      return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
201    else
202      return HTML_TYPE_ADDITIONAL_NAME;
203  }
204
205  if (autocomplete_attribute_value == "family-name")
206    return HTML_TYPE_FAMILY_NAME;
207
208  if (autocomplete_attribute_value == "organization")
209    return HTML_TYPE_ORGANIZATION;
210
211  if (autocomplete_attribute_value == "street-address")
212    return HTML_TYPE_STREET_ADDRESS;
213
214  if (autocomplete_attribute_value == "address-line1")
215    return HTML_TYPE_ADDRESS_LINE1;
216
217  if (autocomplete_attribute_value == "address-line2")
218    return HTML_TYPE_ADDRESS_LINE2;
219
220  if (autocomplete_attribute_value == "address-line3")
221    return HTML_TYPE_ADDRESS_LINE3;
222
223  // TODO(estade): remove support for "locality" and "region".
224  if (autocomplete_attribute_value == "locality")
225    return HTML_TYPE_ADDRESS_LEVEL2;
226
227  if (autocomplete_attribute_value == "region")
228    return HTML_TYPE_ADDRESS_LEVEL1;
229
230  if (autocomplete_attribute_value == "address-level1")
231    return HTML_TYPE_ADDRESS_LEVEL1;
232
233  if (autocomplete_attribute_value == "address-level2")
234    return HTML_TYPE_ADDRESS_LEVEL2;
235
236  if (autocomplete_attribute_value == "address-level3")
237    return HTML_TYPE_ADDRESS_LEVEL3;
238
239  if (autocomplete_attribute_value == "country")
240    return HTML_TYPE_COUNTRY_CODE;
241
242  if (autocomplete_attribute_value == "country-name")
243    return HTML_TYPE_COUNTRY_NAME;
244
245  if (autocomplete_attribute_value == "postal-code")
246    return HTML_TYPE_POSTAL_CODE;
247
248  // content_switches.h isn't accessible from here, hence we have
249  // to copy the string literal. This should be removed soon anyway.
250  if (autocomplete_attribute_value == "address" &&
251      CommandLine::ForCurrentProcess()->HasSwitch(
252          "enable-experimental-web-platform-features")) {
253    return HTML_TYPE_FULL_ADDRESS;
254  }
255
256  if (autocomplete_attribute_value == "cc-name")
257    return HTML_TYPE_CREDIT_CARD_NAME;
258
259  if (autocomplete_attribute_value == "cc-number")
260    return HTML_TYPE_CREDIT_CARD_NUMBER;
261
262  if (autocomplete_attribute_value == "cc-exp") {
263    if (field.max_length == 5)
264      return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
265    else if (field.max_length == 7)
266      return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
267    else
268      return HTML_TYPE_CREDIT_CARD_EXP;
269  }
270
271  if (autocomplete_attribute_value == "cc-exp-month")
272    return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
273
274  if (autocomplete_attribute_value == "cc-exp-year") {
275    if (field.max_length == 2)
276      return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
277    else if (field.max_length == 4)
278      return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
279    else
280      return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
281  }
282
283  if (autocomplete_attribute_value == "cc-csc")
284    return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
285
286  if (autocomplete_attribute_value == "cc-type")
287    return HTML_TYPE_CREDIT_CARD_TYPE;
288
289  if (autocomplete_attribute_value == "tel")
290    return HTML_TYPE_TEL;
291
292  if (autocomplete_attribute_value == "tel-country-code")
293    return HTML_TYPE_TEL_COUNTRY_CODE;
294
295  if (autocomplete_attribute_value == "tel-national")
296    return HTML_TYPE_TEL_NATIONAL;
297
298  if (autocomplete_attribute_value == "tel-area-code")
299    return HTML_TYPE_TEL_AREA_CODE;
300
301  if (autocomplete_attribute_value == "tel-local")
302    return HTML_TYPE_TEL_LOCAL;
303
304  if (autocomplete_attribute_value == "tel-local-prefix")
305    return HTML_TYPE_TEL_LOCAL_PREFIX;
306
307  if (autocomplete_attribute_value == "tel-local-suffix")
308    return HTML_TYPE_TEL_LOCAL_SUFFIX;
309
310  if (autocomplete_attribute_value == "email")
311    return HTML_TYPE_EMAIL;
312
313  return HTML_TYPE_UNKNOWN;
314}
315
316std::string StripDigitsIfRequired(const base::string16& input) {
317  UErrorCode status = U_ZERO_ERROR;
318  CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
319                         (kIgnorePatternInFieldName));
320  CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
321                         (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
322  DCHECK_EQ(status, U_ZERO_ERROR);
323
324  icu::UnicodeString icu_input(input.data(), input.length());
325  matcher.reset(icu_input);
326
327  icu::UnicodeString replaced_string = matcher.replaceAll("", status);
328
329  std::string return_string;
330  status = U_ZERO_ERROR;
331  base::UTF16ToUTF8(replaced_string.getBuffer(),
332                    static_cast<size_t>(replaced_string.length()),
333                    &return_string);
334  if (status != U_ZERO_ERROR) {
335    DVLOG(1) << "Couldn't strip digits in " << base::UTF16ToUTF8(input);
336    return base::UTF16ToUTF8(input);
337  }
338
339  return return_string;
340}
341
342}  // namespace
343
344FormStructure::FormStructure(const FormData& form)
345    : form_name_(form.name),
346      source_url_(form.origin),
347      target_url_(form.action),
348      autofill_count_(0),
349      active_field_count_(0),
350      upload_required_(USE_UPLOAD_RATES),
351      has_author_specified_types_(false) {
352  // Copy the form fields.
353  std::map<base::string16, size_t> unique_names;
354  for (std::vector<FormFieldData>::const_iterator field =
355           form.fields.begin();
356       field != form.fields.end(); ++field) {
357    if (!ShouldSkipField(*field)) {
358      // Add all supported form fields (including with empty names) to the
359      // signature.  This is a requirement for Autofill servers.
360      form_signature_field_names_.append("&");
361      form_signature_field_names_.append(StripDigitsIfRequired(field->name));
362
363      ++active_field_count_;
364    }
365
366    // Generate a unique name for this field by appending a counter to the name.
367    // Make sure to prepend the counter with a non-numeric digit so that we are
368    // guaranteed to avoid collisions.
369    if (!unique_names.count(field->name))
370      unique_names[field->name] = 1;
371    else
372      ++unique_names[field->name];
373    base::string16 unique_name = field->name + base::ASCIIToUTF16("_") +
374        base::IntToString16(unique_names[field->name]);
375    fields_.push_back(new AutofillField(*field, unique_name));
376  }
377
378  std::string method = base::UTF16ToUTF8(form.method);
379  if (StringToLowerASCII(method) == kFormMethodPost) {
380    method_ = POST;
381  } else {
382    // Either the method is 'get', or we don't know.  In this case we default
383    // to GET.
384    method_ = GET;
385  }
386}
387
388FormStructure::~FormStructure() {}
389
390void FormStructure::DetermineHeuristicTypes(
391    const AutofillMetrics& metric_logger) {
392  // First, try to detect field types based on each field's |autocomplete|
393  // attribute value.  If there is at least one form field that specifies an
394  // autocomplete type hint, don't try to apply other heuristics to match fields
395  // in this form.
396  bool has_author_specified_sections;
397  ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
398                                            &has_author_specified_sections);
399
400  if (!has_author_specified_types_) {
401    ServerFieldTypeMap field_type_map;
402    FormField::ParseFormFields(fields_.get(), &field_type_map);
403    for (size_t i = 0; i < field_count(); ++i) {
404      AutofillField* field = fields_[i];
405      ServerFieldTypeMap::iterator iter =
406          field_type_map.find(field->unique_name());
407      if (iter != field_type_map.end())
408        field->set_heuristic_type(iter->second);
409    }
410  }
411
412  UpdateAutofillCount();
413  IdentifySections(has_author_specified_sections);
414
415  if (IsAutofillable(true)) {
416    metric_logger.LogDeveloperEngagementMetric(
417        AutofillMetrics::FILLABLE_FORM_PARSED);
418    if (has_author_specified_types_) {
419      metric_logger.LogDeveloperEngagementMetric(
420          AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
421    }
422  }
423}
424
425bool FormStructure::EncodeUploadRequest(
426    const ServerFieldTypeSet& available_field_types,
427    bool form_was_autofilled,
428    std::string* encoded_xml) const {
429  DCHECK(ShouldBeCrowdsourced());
430
431  // Verify that |available_field_types| agrees with the possible field types we
432  // are uploading.
433  for (std::vector<AutofillField*>::const_iterator field = begin();
434       field != end();
435       ++field) {
436    for (ServerFieldTypeSet::const_iterator type =
437             (*field)->possible_types().begin();
438         type != (*field)->possible_types().end();
439         ++type) {
440      DCHECK(*type == UNKNOWN_TYPE ||
441             *type == EMPTY_TYPE ||
442             available_field_types.count(*type));
443    }
444  }
445
446  // Set up the <autofillupload> element and its attributes.
447  buzz::XmlElement autofill_request_xml(
448      (buzz::QName(kXMLElementAutofillUpload)));
449  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
450                               kClientVersion);
451  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
452                               FormSignature());
453  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
454                               form_was_autofilled ? "true" : "false");
455  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
456                               EncodeFieldTypes(available_field_types).c_str());
457
458  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
459    return false;  // Malformed form, skip it.
460
461  // Obtain the XML structure as a string.
462  *encoded_xml = kXMLDeclaration;
463  *encoded_xml += autofill_request_xml.Str().c_str();
464
465  // To enable this logging, run with the flag --vmodule="form_structure=2".
466  VLOG(2) << "\n" << *encoded_xml;
467
468  return true;
469}
470
471bool FormStructure::EncodeFieldAssignments(
472    const ServerFieldTypeSet& available_field_types,
473    std::string* encoded_xml) const {
474  DCHECK(ShouldBeCrowdsourced());
475
476  // Set up the <fieldassignments> element and its attributes.
477  buzz::XmlElement autofill_request_xml(
478      (buzz::QName(kXMLElementFieldAssignments)));
479  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
480                               FormSignature());
481
482  if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
483                         &autofill_request_xml))
484    return false;  // Malformed form, skip it.
485
486  // Obtain the XML structure as a string.
487  *encoded_xml = kXMLDeclaration;
488  *encoded_xml += autofill_request_xml.Str().c_str();
489
490  return true;
491}
492
493// static
494bool FormStructure::EncodeQueryRequest(
495    const std::vector<FormStructure*>& forms,
496    std::vector<std::string>* encoded_signatures,
497    std::string* encoded_xml) {
498  DCHECK(encoded_signatures);
499  DCHECK(encoded_xml);
500  encoded_xml->clear();
501  encoded_signatures->clear();
502  encoded_signatures->reserve(forms.size());
503
504  // Set up the <autofillquery> element and attributes.
505  buzz::XmlElement autofill_request_xml(
506      (buzz::QName(kXMLElementAutofillQuery)));
507  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
508                               kClientVersion);
509
510  // Some badly formatted web sites repeat forms - detect that and encode only
511  // one form as returned data would be the same for all the repeated forms.
512  std::set<std::string> processed_forms;
513  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
514       it != forms.end();
515       ++it) {
516    std::string signature((*it)->FormSignature());
517    if (processed_forms.find(signature) != processed_forms.end())
518      continue;
519    processed_forms.insert(signature);
520    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
521        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
522    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
523                                      signature);
524
525    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
526                                  encompassing_xml_element.get()))
527      continue;  // Malformed form, skip it.
528
529    autofill_request_xml.AddElement(encompassing_xml_element.release());
530    encoded_signatures->push_back(signature);
531  }
532
533  if (!encoded_signatures->size())
534    return false;
535
536  // Note: Chrome used to also set 'accepts="e"' (where 'e' is for experiments),
537  // but no longer sets this because support for experiments is deprecated.  If
538  // it ever resurfaces, re-add code here to set the attribute accordingly.
539
540  // Obtain the XML structure as a string.
541  *encoded_xml = kXMLDeclaration;
542  *encoded_xml += autofill_request_xml.Str().c_str();
543
544  return true;
545}
546
547// static
548void FormStructure::ParseQueryResponse(
549    const std::string& response_xml,
550    const std::vector<FormStructure*>& forms,
551    const AutofillMetrics& metric_logger) {
552  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
553
554  // Parse the field types from the server response to the query.
555  std::vector<AutofillServerFieldInfo> field_infos;
556  UploadRequired upload_required;
557  AutofillQueryXmlParser parse_handler(&field_infos,
558                                       &upload_required);
559  buzz::XmlParser parser(&parse_handler);
560  parser.Parse(response_xml.c_str(), response_xml.length(), true);
561  if (!parse_handler.succeeded())
562    return;
563
564  metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
565
566  bool heuristics_detected_fillable_field = false;
567  bool query_response_overrode_heuristics = false;
568
569  // Copy the field types into the actual form.
570  std::vector<AutofillServerFieldInfo>::iterator current_info =
571      field_infos.begin();
572  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
573       iter != forms.end(); ++iter) {
574    FormStructure* form = *iter;
575    form->upload_required_ = upload_required;
576
577    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
578         field != form->fields_.end(); ++field) {
579      if (form->ShouldSkipField(**field))
580        continue;
581
582      // In some cases *successful* response does not return all the fields.
583      // Quit the update of the types then.
584      if (current_info == field_infos.end())
585        break;
586
587      // UNKNOWN_TYPE is reserved for use by the client.
588      DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
589
590      ServerFieldType heuristic_type = (*field)->heuristic_type();
591      if (heuristic_type != UNKNOWN_TYPE)
592        heuristics_detected_fillable_field = true;
593
594      (*field)->set_server_type(current_info->field_type);
595      if (heuristic_type != (*field)->Type().GetStorableType())
596        query_response_overrode_heuristics = true;
597
598      // Copy default value into the field if available.
599      if (!current_info->default_value.empty())
600        (*field)->set_default_value(current_info->default_value);
601
602      ++current_info;
603    }
604
605    form->UpdateAutofillCount();
606    form->IdentifySections(false);
607  }
608
609  AutofillMetrics::ServerQueryMetric metric;
610  if (query_response_overrode_heuristics) {
611    if (heuristics_detected_fillable_field) {
612      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
613    } else {
614      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
615    }
616  } else {
617    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
618  }
619  metric_logger.LogServerQueryMetric(metric);
620}
621
622// static
623void FormStructure::GetFieldTypePredictions(
624    const std::vector<FormStructure*>& form_structures,
625    std::vector<FormDataPredictions>* forms) {
626  forms->clear();
627  forms->reserve(form_structures.size());
628  for (size_t i = 0; i < form_structures.size(); ++i) {
629    FormStructure* form_structure = form_structures[i];
630    FormDataPredictions form;
631    form.data.name = form_structure->form_name_;
632    form.data.method =
633        base::ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
634    form.data.origin = form_structure->source_url_;
635    form.data.action = form_structure->target_url_;
636    form.signature = form_structure->FormSignature();
637
638    for (std::vector<AutofillField*>::const_iterator field =
639             form_structure->fields_.begin();
640         field != form_structure->fields_.end(); ++field) {
641      form.data.fields.push_back(FormFieldData(**field));
642
643      FormFieldDataPredictions annotated_field;
644      annotated_field.signature = (*field)->FieldSignature();
645      annotated_field.heuristic_type =
646          AutofillType((*field)->heuristic_type()).ToString();
647      annotated_field.server_type =
648          AutofillType((*field)->server_type()).ToString();
649      annotated_field.overall_type = (*field)->Type().ToString();
650      form.fields.push_back(annotated_field);
651    }
652
653    forms->push_back(form);
654  }
655}
656
657std::string FormStructure::FormSignature() const {
658  std::string scheme(target_url_.scheme());
659  std::string host(target_url_.host());
660
661  // If target host or scheme is empty, set scheme and host of source url.
662  // This is done to match the Toolbar's behavior.
663  if (scheme.empty() || host.empty()) {
664    scheme = source_url_.scheme();
665    host = source_url_.host();
666  }
667
668  std::string form_string = scheme + "://" + host + "&" +
669                            base::UTF16ToUTF8(form_name_) +
670                            form_signature_field_names_;
671
672  return Hash64Bit(form_string);
673}
674
675bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
676  return field.is_checkable;
677}
678
679bool FormStructure::IsAutofillable(bool require_method_post) const {
680  if (autofill_count() < kRequiredAutofillFields)
681    return false;
682
683  return ShouldBeParsed(require_method_post);
684}
685
686void FormStructure::UpdateAutofillCount() {
687  autofill_count_ = 0;
688  for (std::vector<AutofillField*>::const_iterator iter = begin();
689       iter != end(); ++iter) {
690    AutofillField* field = *iter;
691    if (field && field->IsFieldFillable())
692      ++autofill_count_;
693  }
694}
695
696bool FormStructure::ShouldBeParsed(bool require_method_post) const {
697  if (active_field_count() < kRequiredAutofillFields)
698    return false;
699
700  // Rule out http(s)://*/search?...
701  //  e.g. http://www.google.com/search?q=...
702  //       http://search.yahoo.com/search?p=...
703  if (target_url_.path() == "/search")
704    return false;
705
706  bool has_text_field = false;
707  for (std::vector<AutofillField*>::const_iterator it = begin();
708       it != end() && !has_text_field; ++it) {
709    has_text_field |= (*it)->form_control_type != "select-one";
710  }
711  if (!has_text_field)
712    return false;
713
714  return !require_method_post || (method_ == POST);
715}
716
717bool FormStructure::ShouldBeCrowdsourced() const {
718  return !has_author_specified_types_ && ShouldBeParsed(true);
719}
720
721void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
722  // Map from field signatures to cached fields.
723  std::map<std::string, const AutofillField*> cached_fields;
724  for (size_t i = 0; i < cached_form.field_count(); ++i) {
725    const AutofillField* field = cached_form.field(i);
726    cached_fields[field->FieldSignature()] = field;
727  }
728
729  for (std::vector<AutofillField*>::const_iterator iter = begin();
730       iter != end(); ++iter) {
731    AutofillField* field = *iter;
732
733    std::map<std::string, const AutofillField*>::const_iterator
734        cached_field = cached_fields.find(field->FieldSignature());
735    if (cached_field != cached_fields.end()) {
736      if (field->form_control_type != "select-one" &&
737          field->value == cached_field->second->value) {
738        // From the perspective of learning user data, text fields containing
739        // default values are equivalent to empty fields.
740        field->value = base::string16();
741      }
742
743      field->set_heuristic_type(cached_field->second->heuristic_type());
744      field->set_server_type(cached_field->second->server_type());
745    }
746  }
747
748  UpdateAutofillCount();
749
750  // The form signature should match between query and upload requests to the
751  // server. On many websites, form elements are dynamically added, removed, or
752  // rearranged via JavaScript between page load and form submission, so we
753  // copy over the |form_signature_field_names_| corresponding to the query
754  // request.
755  DCHECK_EQ(cached_form.form_name_, form_name_);
756  DCHECK_EQ(cached_form.source_url_, source_url_);
757  DCHECK_EQ(cached_form.target_url_, target_url_);
758  form_signature_field_names_ = cached_form.form_signature_field_names_;
759}
760
761void FormStructure::LogQualityMetrics(
762    const AutofillMetrics& metric_logger,
763    const base::TimeTicks& load_time,
764    const base::TimeTicks& interaction_time,
765    const base::TimeTicks& submission_time) const {
766  size_t num_detected_field_types = 0;
767  bool did_autofill_all_possible_fields = true;
768  bool did_autofill_some_possible_fields = false;
769  for (size_t i = 0; i < field_count(); ++i) {
770    const AutofillField* field = this->field(i);
771
772    // No further logging for empty fields nor for fields where the entered data
773    // does not appear to already exist in the user's stored Autofill data.
774    const ServerFieldTypeSet& field_types = field->possible_types();
775    DCHECK(!field_types.empty());
776    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
777      continue;
778
779    // Similarly, no further logging for password fields.  Those are primarily
780    // related to a different feature code path, and so make more sense to track
781    // outside of this metric.
782    if (field->form_control_type == "password")
783      continue;
784
785    ++num_detected_field_types;
786    if (field->is_autofilled)
787      did_autofill_some_possible_fields = true;
788    else
789      did_autofill_all_possible_fields = false;
790
791    // Collapse field types that Chrome treats as identical, e.g. home and
792    // billing address fields.
793    ServerFieldTypeSet collapsed_field_types;
794    for (ServerFieldTypeSet::const_iterator it = field_types.begin();
795         it != field_types.end();
796         ++it) {
797      // Since we currently only support US phone numbers, the (city code + main
798      // digits) number is almost always identical to the whole phone number.
799      // TODO(isherman): Improve this logic once we add support for
800      // international numbers.
801      if (*it == PHONE_HOME_CITY_AND_NUMBER)
802        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
803      else
804        collapsed_field_types.insert(AutofillType(*it).GetStorableType());
805    }
806
807    // Capture the field's type, if it is unambiguous.
808    ServerFieldType field_type = UNKNOWN_TYPE;
809    if (collapsed_field_types.size() == 1)
810      field_type = *collapsed_field_types.begin();
811
812    ServerFieldType heuristic_type =
813        AutofillType(field->heuristic_type()).GetStorableType();
814    ServerFieldType server_type =
815        AutofillType(field->server_type()).GetStorableType();
816    ServerFieldType predicted_type = field->Type().GetStorableType();
817
818    // Log heuristic, server, and overall type quality metrics, independently of
819    // whether the field was autofilled.
820    if (heuristic_type == UNKNOWN_TYPE) {
821      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
822                                               field_type);
823    } else if (field_types.count(heuristic_type)) {
824      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
825                                               field_type);
826    } else {
827      metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
828                                               field_type);
829    }
830
831    if (server_type == NO_SERVER_DATA) {
832      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
833                                            field_type);
834    } else if (field_types.count(server_type)) {
835      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
836                                            field_type);
837    } else {
838      metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
839                                            field_type);
840    }
841
842    if (predicted_type == UNKNOWN_TYPE) {
843      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
844                                             field_type);
845    } else if (field_types.count(predicted_type)) {
846      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
847                                             field_type);
848    } else {
849      metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
850                                             field_type);
851    }
852  }
853
854  if (num_detected_field_types < kRequiredAutofillFields) {
855    metric_logger.LogUserHappinessMetric(
856        AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
857  } else {
858    if (did_autofill_all_possible_fields) {
859      metric_logger.LogUserHappinessMetric(
860          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
861    } else if (did_autofill_some_possible_fields) {
862      metric_logger.LogUserHappinessMetric(
863          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
864    } else {
865      metric_logger.LogUserHappinessMetric(
866          AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
867    }
868
869    // Unlike the other times, the |submission_time| should always be available.
870    DCHECK(!submission_time.is_null());
871
872    // The |load_time| might be unset, in the case that the form was dynamically
873    // added to the DOM.
874    if (!load_time.is_null()) {
875      // Submission should always chronologically follow form load.
876      DCHECK(submission_time > load_time);
877      base::TimeDelta elapsed = submission_time - load_time;
878      if (did_autofill_some_possible_fields)
879        metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
880      else
881        metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
882    }
883
884    // The |interaction_time| might be unset, in the case that the user
885    // submitted a blank form.
886    if (!interaction_time.is_null()) {
887      // Submission should always chronologically follow interaction.
888      DCHECK(submission_time > interaction_time);
889      base::TimeDelta elapsed = submission_time - interaction_time;
890      if (did_autofill_some_possible_fields) {
891        metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
892      } else {
893        metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
894            elapsed);
895      }
896    }
897  }
898}
899
900const AutofillField* FormStructure::field(size_t index) const {
901  if (index >= fields_.size()) {
902    NOTREACHED();
903    return NULL;
904  }
905
906  return fields_[index];
907}
908
909AutofillField* FormStructure::field(size_t index) {
910  return const_cast<AutofillField*>(
911      static_cast<const FormStructure*>(this)->field(index));
912}
913
914size_t FormStructure::field_count() const {
915  return fields_.size();
916}
917
918size_t FormStructure::active_field_count() const {
919  return active_field_count_;
920}
921
922FormData FormStructure::ToFormData() const {
923  // |data.user_submitted| will always be false.
924  FormData data;
925  data.name = form_name_;
926  data.origin = source_url_;
927  data.action = target_url_;
928  data.method = base::ASCIIToUTF16(method_ == POST ? "POST" : "GET");
929
930  for (size_t i = 0; i < fields_.size(); ++i) {
931    data.fields.push_back(FormFieldData(*fields_[i]));
932  }
933
934  return data;
935}
936
937bool FormStructure::operator==(const FormData& form) const {
938  // TODO(jhawkins): Is this enough to differentiate a form?
939  if (form_name_ == form.name &&
940      source_url_ == form.origin &&
941      target_url_ == form.action) {
942    return true;
943  }
944
945  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
946  // set up.
947
948  return false;
949}
950
951bool FormStructure::operator!=(const FormData& form) const {
952  return !operator==(form);
953}
954
955std::string FormStructure::Hash64Bit(const std::string& str) {
956  std::string hash_bin = base::SHA1HashString(str);
957  DCHECK_EQ(20U, hash_bin.length());
958
959  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
960                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
961                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
962                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
963                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
964                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
965                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
966                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);
967
968  return base::Uint64ToString(hash64);
969}
970
971bool FormStructure::EncodeFormRequest(
972    FormStructure::EncodeRequestType request_type,
973    buzz::XmlElement* encompassing_xml_element) const {
974  if (!field_count())  // Nothing to add.
975    return false;
976
977  // Some badly formatted web sites repeat fields - limit number of fields to
978  // 48, which is far larger than any valid form and XML still fits into 2K.
979  // Do not send requests for forms with more than this many fields, as they are
980  // near certainly not valid/auto-fillable.
981  const size_t kMaxFieldsOnTheForm = 48;
982  if (field_count() > kMaxFieldsOnTheForm)
983    return false;
984
985  // Add the child nodes for the form fields.
986  for (size_t index = 0; index < field_count(); ++index) {
987    const AutofillField* field = fields_[index];
988    switch (request_type) {
989      case FormStructure::UPLOAD:
990        EncodeFieldForUpload(*field, encompassing_xml_element);
991        break;
992      case FormStructure::QUERY:
993        if (ShouldSkipField(*field))
994          continue;
995        EncodeFieldForQuery(*field, encompassing_xml_element);
996        break;
997      case FormStructure::FIELD_ASSIGNMENTS:
998        EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
999        break;
1000    }
1001  }
1002  return true;
1003}
1004
1005void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1006    bool* found_types,
1007    bool* found_sections) {
1008  const std::string kDefaultSection = "-default";
1009
1010  *found_types = false;
1011  *found_sections = false;
1012  for (std::vector<AutofillField*>::iterator it = fields_.begin();
1013       it != fields_.end(); ++it) {
1014    AutofillField* field = *it;
1015
1016    // To prevent potential section name collisions, add a default suffix for
1017    // other fields.  Without this, 'autocomplete' attribute values
1018    // "section--shipping street-address" and "shipping street-address" would be
1019    // parsed identically, given the section handling code below.  We do this
1020    // before any validation so that fields with invalid attributes still end up
1021    // in the default section.  These default section names will be overridden
1022    // by subsequent heuristic parsing steps if there are no author-specified
1023    // section names.
1024    field->set_section(kDefaultSection);
1025
1026    // Canonicalize the attribute value by trimming whitespace, collapsing
1027    // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1028    std::string autocomplete_attribute =
1029        base::CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1030    autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1031
1032    // The autocomplete attribute is overloaded: it can specify either a field
1033    // type hint or whether autocomplete should be enabled at all.  Ignore the
1034    // latter type of attribute value.
1035    if (autocomplete_attribute.empty() ||
1036        autocomplete_attribute == "on" ||
1037        autocomplete_attribute == "off") {
1038      continue;
1039    }
1040
1041    // Any other value, even it is invalid, is considered to be a type hint.
1042    // This allows a website's author to specify an attribute like
1043    // autocomplete="other" on a field to disable all Autofill heuristics for
1044    // the form.
1045    *found_types = true;
1046
1047    // Tokenize the attribute value.  Per the spec, the tokens are parsed in
1048    // reverse order.
1049    std::vector<std::string> tokens;
1050    Tokenize(autocomplete_attribute, " ", &tokens);
1051
1052    // The final token must be the field type.
1053    // If it is not one of the known types, abort.
1054    DCHECK(!tokens.empty());
1055    std::string field_type_token = tokens.back();
1056    tokens.pop_back();
1057    HtmlFieldType field_type =
1058        FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1059    if (field_type == HTML_TYPE_UNKNOWN)
1060      continue;
1061
1062    // The preceding token, if any, may be a type hint.
1063    if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1064      // If it is, it must match the field type; otherwise, abort.
1065      // Note that an invalid token invalidates the entire attribute value, even
1066      // if the other tokens are valid.
1067      if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1068        continue;
1069
1070      // Chrome Autofill ignores these type hints.
1071      tokens.pop_back();
1072    }
1073
1074    // The preceding token, if any, may be a fixed string that is either
1075    // "shipping" or "billing".  Chrome Autofill treats these as implicit
1076    // section name suffixes.
1077    DCHECK_EQ(kDefaultSection, field->section());
1078    std::string section = field->section();
1079    HtmlFieldMode mode = HTML_MODE_NONE;
1080    if (!tokens.empty()) {
1081      if (tokens.back() == kShippingMode)
1082        mode = HTML_MODE_SHIPPING;
1083      else if (tokens.back() == kBillingMode)
1084        mode = HTML_MODE_BILLING;
1085    }
1086
1087    if (mode != HTML_MODE_NONE) {
1088      section = "-" + tokens.back();
1089      tokens.pop_back();
1090    }
1091
1092    // The preceding token, if any, may be a named section.
1093    const std::string kSectionPrefix = "section-";
1094    if (!tokens.empty() &&
1095        StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1096      // Prepend this section name to the suffix set in the preceding block.
1097      section = tokens.back().substr(kSectionPrefix.size()) + section;
1098      tokens.pop_back();
1099    }
1100
1101    // No other tokens are allowed.  If there are any remaining, abort.
1102    if (!tokens.empty())
1103      continue;
1104
1105    if (section != kDefaultSection) {
1106      *found_sections = true;
1107      field->set_section(section);
1108    }
1109
1110    // No errors encountered while parsing!
1111    // Update the |field|'s type based on what was parsed from the attribute.
1112    field->SetHtmlType(field_type, mode);
1113  }
1114}
1115
1116bool FormStructure::FillFields(
1117    const std::vector<ServerFieldType>& types,
1118    const InputFieldComparator& matches,
1119    const base::Callback<base::string16(const AutofillType&)>& get_info,
1120    const std::string& app_locale) {
1121  bool filled_something = false;
1122  for (size_t i = 0; i < field_count(); ++i) {
1123    for (size_t j = 0; j < types.size(); ++j) {
1124      if (matches.Run(types[j], *field(i))) {
1125        AutofillField::FillFormField(*field(i),
1126                                     get_info.Run(field(i)->Type()),
1127                                     app_locale,
1128                                     field(i));
1129        filled_something = true;
1130        break;
1131      }
1132    }
1133  }
1134  return filled_something;
1135}
1136
1137std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) {
1138  std::set<base::string16> values;
1139  AutofillType target_type(type);
1140  for (std::vector<AutofillField*>::iterator iter = fields_.begin();
1141       iter != fields_.end(); ++iter) {
1142    AutofillField* field = *iter;
1143    if (field->Type().GetStorableType() != target_type.GetStorableType() ||
1144        field->Type().group() != target_type.group()) {
1145      continue;
1146    }
1147
1148    // No option values; anything goes.
1149    if (field->option_values.empty())
1150      return std::set<base::string16>();
1151
1152    for (size_t i = 0; i < field->option_values.size(); ++i) {
1153      if (!field->option_values[i].empty())
1154        values.insert(base::i18n::ToUpper(field->option_values[i]));
1155    }
1156
1157    for (size_t i = 0; i < field->option_contents.size(); ++i) {
1158      if (!field->option_contents[i].empty())
1159        values.insert(base::i18n::ToUpper(field->option_contents[i]));
1160    }
1161  }
1162
1163  return values;
1164}
1165
1166void FormStructure::IdentifySections(bool has_author_specified_sections) {
1167  if (fields_.empty())
1168    return;
1169
1170  if (!has_author_specified_sections) {
1171    // Name sections after the first field in the section.
1172    base::string16 current_section = fields_.front()->unique_name();
1173
1174    // Keep track of the types we've seen in this section.
1175    std::set<ServerFieldType> seen_types;
1176    ServerFieldType previous_type = UNKNOWN_TYPE;
1177
1178    for (std::vector<AutofillField*>::iterator field = fields_.begin();
1179         field != fields_.end(); ++field) {
1180      const ServerFieldType current_type = (*field)->Type().GetStorableType();
1181
1182      bool already_saw_current_type = seen_types.count(current_type) > 0;
1183
1184      // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1185      // evening phone number.  Our phone number detection is also generally a
1186      // little off.  Hence, ignore this field type as a signal here.
1187      if (AutofillType(current_type).group() == PHONE_HOME)
1188        already_saw_current_type = false;
1189
1190      // Some forms have adjacent fields of the same type.  Two common examples:
1191      //  * Forms with two email fields, where the second is meant to "confirm"
1192      //    the first.
1193      //  * Forms with a <select> menu for states in some countries, and a
1194      //    freeform <input> field for states in other countries.  (Usually,
1195      //    only one of these two will be visible for any given choice of
1196      //    country.)
1197      // Generally, adjacent fields of the same type belong in the same logical
1198      // section.
1199      if (current_type == previous_type)
1200        already_saw_current_type = false;
1201
1202      previous_type = current_type;
1203
1204      if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1205        // We reached the end of a section, so start a new section.
1206        seen_types.clear();
1207        current_section = (*field)->unique_name();
1208      }
1209
1210      seen_types.insert(current_type);
1211      (*field)->set_section(base::UTF16ToUTF8(current_section));
1212    }
1213  }
1214
1215  // Ensure that credit card and address fields are in separate sections.
1216  // This simplifies the section-aware logic in autofill_manager.cc.
1217  for (std::vector<AutofillField*>::iterator field = fields_.begin();
1218       field != fields_.end(); ++field) {
1219    FieldTypeGroup field_type_group = (*field)->Type().group();
1220    if (field_type_group == CREDIT_CARD)
1221      (*field)->set_section((*field)->section() + "-cc");
1222    else
1223      (*field)->set_section((*field)->section() + "-default");
1224  }
1225}
1226
1227}  // namespace autofill
1228