form_structure.cc revision 58537e28ecd584eab876aee8be7156509866d23a
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/form_structure.h" 6 7#include <utility> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/logging.h" 12#include "base/memory/scoped_ptr.h" 13#include "base/sha1.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/strings/string_util.h" 16#include "base/strings/stringprintf.h" 17#include "base/strings/utf_string_conversions.h" 18#include "base/time/time.h" 19#include "components/autofill/core/browser/autofill_metrics.h" 20#include "components/autofill/core/browser/autofill_type.h" 21#include "components/autofill/core/browser/autofill_xml_parser.h" 22#include "components/autofill/core/browser/field_types.h" 23#include "components/autofill/core/browser/form_field.h" 24#include "components/autofill/core/common/autofill_constants.h" 25#include "components/autofill/core/common/form_data.h" 26#include "components/autofill/core/common/form_data_predictions.h" 27#include "components/autofill/core/common/form_field_data.h" 28#include "components/autofill/core/common/form_field_data_predictions.h" 29#include "third_party/icu/source/i18n/unicode/regex.h" 30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h" 31 32namespace autofill { 33namespace { 34 35const char kFormMethodPost[] = "post"; 36 37// XML elements and attributes. 38const char kAttributeAcceptedFeatures[] = "accepts"; 39const char kAttributeAutofillUsed[] = "autofillused"; 40const char kAttributeAutofillType[] = "autofilltype"; 41const char kAttributeClientVersion[] = "clientversion"; 42const char kAttributeDataPresent[] = "datapresent"; 43const char kAttributeFieldID[] = "fieldid"; 44const char kAttributeFieldType[] = "fieldtype"; 45const char kAttributeFormSignature[] = "formsignature"; 46const char kAttributeName[] = "name"; 47const char kAttributeSignature[] = "signature"; 48const char kAttributeUrlprefixSignature[] = "urlprefixsignature"; 49const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments 50const char kClientVersion[] = "6.1.1715.1442/en (GGLL)"; 51const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 52const char kXMLElementAutofillQuery[] = "autofillquery"; 53const char kXMLElementAutofillUpload[] = "autofillupload"; 54const char kXMLElementFieldAssignments[] = "fieldassignments"; 55const char kXMLElementField[] = "field"; 56const char kXMLElementFields[] = "fields"; 57const char kXMLElementForm[] = "form"; 58const char kBillingMode[] = "billing"; 59const char kShippingMode[] = "shipping"; 60 61// Stip away >= 5 consecutive digits. 62const char kIgnorePatternInFieldName[] = "\\d{5,}+"; 63 64// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to 65// |available_field_types| and returns the hex representation as a string. 66std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) { 67 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte, 68 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field. 69 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8; 70 71 // Pack the types in |available_field_types| into |bit_field|. 72 std::vector<uint8> bit_field(kNumBytes, 0); 73 for (ServerFieldTypeSet::const_iterator field_type = 74 available_field_types.begin(); 75 field_type != available_field_types.end(); 76 ++field_type) { 77 // Set the appropriate bit in the field. The bit we set is the one 78 // |field_type| % 8 from the left of the byte. 79 const size_t byte = *field_type / 8; 80 const size_t bit = 0x80 >> (*field_type % 8); 81 DCHECK(byte < bit_field.size()); 82 bit_field[byte] |= bit; 83 } 84 85 // Discard any trailing zeroes. 86 // If there are no available types, we return the empty string. 87 size_t data_end = bit_field.size(); 88 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) { 89 } 90 91 // Print all meaningfull bytes into a string. 92 std::string data_presence; 93 data_presence.reserve(data_end * 2 + 1); 94 for (size_t i = 0; i < data_end; ++i) { 95 base::StringAppendF(&data_presence, "%02x", bit_field[i]); 96 } 97 98 return data_presence; 99} 100 101// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 102// in upload xml, and also add them to the parent XmlElement. 103void EncodeFieldForUpload(const AutofillField& field, 104 buzz::XmlElement* parent) { 105 // Don't upload checkable fields. 106 if (field.is_checkable) 107 return; 108 109 ServerFieldTypeSet types = field.possible_types(); 110 // |types| could be empty in unit-tests only. 111 for (ServerFieldTypeSet::iterator field_type = types.begin(); 112 field_type != types.end(); ++field_type) { 113 buzz::XmlElement *field_element = new buzz::XmlElement( 114 buzz::QName(kXMLElementField)); 115 116 field_element->SetAttr(buzz::QName(kAttributeSignature), 117 field.FieldSignature()); 118 field_element->SetAttr(buzz::QName(kAttributeAutofillType), 119 base::IntToString(*field_type)); 120 parent->AddElement(field_element); 121 } 122} 123 124// Helper for |EncodeFormRequest()| that creates XmlElement for the given field 125// in query xml, and also add it to the parent XmlElement. 126void EncodeFieldForQuery(const AutofillField& field, 127 buzz::XmlElement* parent) { 128 buzz::XmlElement *field_element = new buzz::XmlElement( 129 buzz::QName(kXMLElementField)); 130 field_element->SetAttr(buzz::QName(kAttributeSignature), 131 field.FieldSignature()); 132 parent->AddElement(field_element); 133} 134 135// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 136// in field assignments xml, and also add them to the parent XmlElement. 137void EncodeFieldForFieldAssignments(const AutofillField& field, 138 buzz::XmlElement* parent) { 139 ServerFieldTypeSet types = field.possible_types(); 140 for (ServerFieldTypeSet::iterator field_type = types.begin(); 141 field_type != types.end(); ++field_type) { 142 buzz::XmlElement *field_element = new buzz::XmlElement( 143 buzz::QName(kXMLElementFields)); 144 145 field_element->SetAttr(buzz::QName(kAttributeFieldID), 146 field.FieldSignature()); 147 field_element->SetAttr(buzz::QName(kAttributeFieldType), 148 base::IntToString(*field_type)); 149 field_element->SetAttr(buzz::QName(kAttributeName), 150 UTF16ToUTF8(field.name)); 151 parent->AddElement(field_element); 152 } 153} 154 155// Returns |true| iff the |token| is a type hint for a contact field, as 156// specified in the implementation section of http://is.gd/whatwg_autocomplete 157// Note that "fax" and "pager" are intentionally ignored, as Chrome does not 158// support filling either type of information. 159bool IsContactTypeHint(const std::string& token) { 160 return token == "home" || token == "work" || token == "mobile"; 161} 162 163// Returns |true| iff the |token| is a type hint appropriate for a field of the 164// given |field_type|, as specified in the implementation section of 165// http://is.gd/whatwg_autocomplete 166bool ContactTypeHintMatchesFieldType(const std::string& token, 167 HtmlFieldType field_type) { 168 // The "home" and "work" type hints are only appropriate for email and phone 169 // number field types. 170 if (token == "home" || token == "work") { 171 return field_type == HTML_TYPE_EMAIL || 172 (field_type >= HTML_TYPE_TEL && 173 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX); 174 } 175 176 // The "mobile" type hint is only appropriate for phone number field types. 177 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not 178 // support filling either type of information. 179 if (token == "mobile") { 180 return field_type >= HTML_TYPE_TEL && 181 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX; 182 } 183 184 return false; 185} 186 187// Returns the Chrome Autofill-supported field type corresponding to the given 188// |autocomplete_attribute_value|, if there is one, in the context of the given 189// |field|. Chrome Autofill supports a subset of the field types listed at 190// http://is.gd/whatwg_autocomplete 191HtmlFieldType FieldTypeFromAutocompleteAttributeValue( 192 const std::string& autocomplete_attribute_value, 193 const AutofillField& field) { 194 if (autocomplete_attribute_value == "name") 195 return HTML_TYPE_NAME; 196 197 if (autocomplete_attribute_value == "given-name") 198 return HTML_TYPE_GIVEN_NAME; 199 200 if (autocomplete_attribute_value == "additional-name") { 201 if (field.max_length == 1) 202 return HTML_TYPE_ADDITIONAL_NAME_INITIAL; 203 else 204 return HTML_TYPE_ADDITIONAL_NAME; 205 } 206 207 if (autocomplete_attribute_value == "family-name") 208 return HTML_TYPE_FAMILY_NAME; 209 210 if (autocomplete_attribute_value == "organization") 211 return HTML_TYPE_ORGANIZATION; 212 213 if (autocomplete_attribute_value == "street-address") 214 return HTML_TYPE_STREET_ADDRESS; 215 216 if (autocomplete_attribute_value == "address-line1") 217 return HTML_TYPE_ADDRESS_LINE1; 218 219 if (autocomplete_attribute_value == "address-line2") 220 return HTML_TYPE_ADDRESS_LINE2; 221 222 if (autocomplete_attribute_value == "locality") 223 return HTML_TYPE_LOCALITY; 224 225 if (autocomplete_attribute_value == "region") 226 return HTML_TYPE_REGION; 227 228 if (autocomplete_attribute_value == "country") 229 return HTML_TYPE_COUNTRY_CODE; 230 231 if (autocomplete_attribute_value == "country-name") 232 return HTML_TYPE_COUNTRY_NAME; 233 234 if (autocomplete_attribute_value == "postal-code") 235 return HTML_TYPE_POSTAL_CODE; 236 237 if (autocomplete_attribute_value == "cc-name") 238 return HTML_TYPE_CREDIT_CARD_NAME; 239 240 if (autocomplete_attribute_value == "cc-number") 241 return HTML_TYPE_CREDIT_CARD_NUMBER; 242 243 if (autocomplete_attribute_value == "cc-exp") { 244 if (field.max_length == 5) 245 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR; 246 else if (field.max_length == 7) 247 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR; 248 else 249 return HTML_TYPE_CREDIT_CARD_EXP; 250 } 251 252 if (autocomplete_attribute_value == "cc-exp-month") 253 return HTML_TYPE_CREDIT_CARD_EXP_MONTH; 254 255 if (autocomplete_attribute_value == "cc-exp-year") { 256 if (field.max_length == 2) 257 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR; 258 else if (field.max_length == 4) 259 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR; 260 else 261 return HTML_TYPE_CREDIT_CARD_EXP_YEAR; 262 } 263 264 if (autocomplete_attribute_value == "cc-csc") 265 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE; 266 267 if (autocomplete_attribute_value == "cc-type") 268 return HTML_TYPE_CREDIT_CARD_TYPE; 269 270 if (autocomplete_attribute_value == "tel") 271 return HTML_TYPE_TEL; 272 273 if (autocomplete_attribute_value == "tel-country-code") 274 return HTML_TYPE_TEL_COUNTRY_CODE; 275 276 if (autocomplete_attribute_value == "tel-national") 277 return HTML_TYPE_TEL_NATIONAL; 278 279 if (autocomplete_attribute_value == "tel-area-code") 280 return HTML_TYPE_TEL_AREA_CODE; 281 282 if (autocomplete_attribute_value == "tel-local") 283 return HTML_TYPE_TEL_LOCAL; 284 285 if (autocomplete_attribute_value == "tel-local-prefix") 286 return HTML_TYPE_TEL_LOCAL_PREFIX; 287 288 if (autocomplete_attribute_value == "tel-local-suffix") 289 return HTML_TYPE_TEL_LOCAL_SUFFIX; 290 291 if (autocomplete_attribute_value == "email") 292 return HTML_TYPE_EMAIL; 293 294 return HTML_TYPE_UNKNOWN; 295} 296 297std::string StripDigitsIfRequired(const base::string16& input) { 298 UErrorCode status = U_ZERO_ERROR; 299 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern, 300 (kIgnorePatternInFieldName)); 301 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher, 302 (icu_pattern, UREGEX_CASE_INSENSITIVE, status)); 303 DCHECK_EQ(status, U_ZERO_ERROR); 304 305 icu::UnicodeString icu_input(input.data(), input.length()); 306 matcher.reset(icu_input); 307 308 icu::UnicodeString replaced_string = matcher.replaceAll("", status); 309 310 std::string return_string; 311 status = U_ZERO_ERROR; 312 UTF16ToUTF8(replaced_string.getBuffer(), 313 static_cast<size_t>(replaced_string.length()), 314 &return_string); 315 if (status != U_ZERO_ERROR) { 316 DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input); 317 return UTF16ToUTF8(input); 318 } 319 320 return return_string; 321} 322 323} // namespace 324 325FormStructure::FormStructure(const FormData& form) 326 : form_name_(form.name), 327 source_url_(form.origin), 328 target_url_(form.action), 329 autofill_count_(0), 330 active_field_count_(0), 331 upload_required_(USE_UPLOAD_RATES), 332 server_experiment_id_("no server response"), 333 has_author_specified_types_(false) { 334 // Copy the form fields. 335 std::map<base::string16, size_t> unique_names; 336 for (std::vector<FormFieldData>::const_iterator field = 337 form.fields.begin(); 338 field != form.fields.end(); field++) { 339 340 if (!ShouldSkipField(*field)) { 341 // Add all supported form fields (including with empty names) to the 342 // signature. This is a requirement for Autofill servers. 343 form_signature_field_names_.append("&"); 344 form_signature_field_names_.append(StripDigitsIfRequired(field->name)); 345 346 ++active_field_count_; 347 } 348 349 // Generate a unique name for this field by appending a counter to the name. 350 // Make sure to prepend the counter with a non-numeric digit so that we are 351 // guaranteed to avoid collisions. 352 if (!unique_names.count(field->name)) 353 unique_names[field->name] = 1; 354 else 355 ++unique_names[field->name]; 356 base::string16 unique_name = field->name + ASCIIToUTF16("_") + 357 base::IntToString16(unique_names[field->name]); 358 fields_.push_back(new AutofillField(*field, unique_name)); 359 } 360 361 std::string method = UTF16ToUTF8(form.method); 362 if (StringToLowerASCII(method) == kFormMethodPost) { 363 method_ = POST; 364 } else { 365 // Either the method is 'get', or we don't know. In this case we default 366 // to GET. 367 method_ = GET; 368 } 369} 370 371FormStructure::~FormStructure() {} 372 373void FormStructure::DetermineHeuristicTypes( 374 const AutofillMetrics& metric_logger) { 375 // First, try to detect field types based on each field's |autocomplete| 376 // attribute value. If there is at least one form field that specifies an 377 // autocomplete type hint, don't try to apply other heuristics to match fields 378 // in this form. 379 bool has_author_specified_sections; 380 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_, 381 &has_author_specified_sections); 382 383 if (!has_author_specified_types_) { 384 ServerFieldTypeMap field_type_map; 385 FormField::ParseFormFields(fields_.get(), &field_type_map); 386 for (size_t i = 0; i < field_count(); ++i) { 387 AutofillField* field = fields_[i]; 388 ServerFieldTypeMap::iterator iter = 389 field_type_map.find(field->unique_name()); 390 if (iter != field_type_map.end()) 391 field->set_heuristic_type(iter->second); 392 } 393 } 394 395 UpdateAutofillCount(); 396 IdentifySections(has_author_specified_sections); 397 398 if (IsAutofillable(true)) { 399 metric_logger.LogDeveloperEngagementMetric( 400 AutofillMetrics::FILLABLE_FORM_PARSED); 401 if (has_author_specified_types_) { 402 metric_logger.LogDeveloperEngagementMetric( 403 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS); 404 } 405 } 406} 407 408bool FormStructure::EncodeUploadRequest( 409 const ServerFieldTypeSet& available_field_types, 410 bool form_was_autofilled, 411 std::string* encoded_xml) const { 412 DCHECK(ShouldBeCrowdsourced()); 413 414 // Verify that |available_field_types| agrees with the possible field types we 415 // are uploading. 416 for (std::vector<AutofillField*>::const_iterator field = begin(); 417 field != end(); 418 ++field) { 419 for (ServerFieldTypeSet::const_iterator type = 420 (*field)->possible_types().begin(); 421 type != (*field)->possible_types().end(); 422 ++type) { 423 DCHECK(*type == UNKNOWN_TYPE || 424 *type == EMPTY_TYPE || 425 available_field_types.count(*type)); 426 } 427 } 428 429 // Set up the <autofillupload> element and its attributes. 430 buzz::XmlElement autofill_request_xml( 431 (buzz::QName(kXMLElementAutofillUpload))); 432 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 433 kClientVersion); 434 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 435 FormSignature()); 436 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed), 437 form_was_autofilled ? "true" : "false"); 438 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent), 439 EncodeFieldTypes(available_field_types).c_str()); 440 441 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml)) 442 return false; // Malformed form, skip it. 443 444 // Obtain the XML structure as a string. 445 *encoded_xml = kXMLDeclaration; 446 *encoded_xml += autofill_request_xml.Str().c_str(); 447 448 // To enable this logging, run with the flag --vmodule="form_structure=2". 449 VLOG(2) << "\n" << *encoded_xml; 450 451 return true; 452} 453 454bool FormStructure::EncodeFieldAssignments( 455 const ServerFieldTypeSet& available_field_types, 456 std::string* encoded_xml) const { 457 DCHECK(ShouldBeCrowdsourced()); 458 459 // Set up the <fieldassignments> element and its attributes. 460 buzz::XmlElement autofill_request_xml( 461 (buzz::QName(kXMLElementFieldAssignments))); 462 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 463 FormSignature()); 464 465 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS, 466 &autofill_request_xml)) 467 return false; // Malformed form, skip it. 468 469 // Obtain the XML structure as a string. 470 *encoded_xml = kXMLDeclaration; 471 *encoded_xml += autofill_request_xml.Str().c_str(); 472 473 return true; 474} 475 476// static 477bool FormStructure::EncodeQueryRequest( 478 const std::vector<FormStructure*>& forms, 479 std::vector<std::string>* encoded_signatures, 480 std::string* encoded_xml) { 481 DCHECK(encoded_signatures); 482 DCHECK(encoded_xml); 483 encoded_xml->clear(); 484 encoded_signatures->clear(); 485 encoded_signatures->reserve(forms.size()); 486 487 // Set up the <autofillquery> element and attributes. 488 buzz::XmlElement autofill_request_xml( 489 (buzz::QName(kXMLElementAutofillQuery))); 490 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 491 kClientVersion); 492 493 // Some badly formatted web sites repeat forms - detect that and encode only 494 // one form as returned data would be the same for all the repeated forms. 495 std::set<std::string> processed_forms; 496 for (ScopedVector<FormStructure>::const_iterator it = forms.begin(); 497 it != forms.end(); 498 ++it) { 499 std::string signature((*it)->FormSignature()); 500 if (processed_forms.find(signature) != processed_forms.end()) 501 continue; 502 processed_forms.insert(signature); 503 scoped_ptr<buzz::XmlElement> encompassing_xml_element( 504 new buzz::XmlElement(buzz::QName(kXMLElementForm))); 505 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature), 506 signature); 507 508 if (!(*it)->EncodeFormRequest(FormStructure::QUERY, 509 encompassing_xml_element.get())) 510 continue; // Malformed form, skip it. 511 512 autofill_request_xml.AddElement(encompassing_xml_element.release()); 513 encoded_signatures->push_back(signature); 514 } 515 516 if (!encoded_signatures->size()) 517 return false; 518 519 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures), 520 kAcceptedFeaturesExperiment); 521 522 // Obtain the XML structure as a string. 523 *encoded_xml = kXMLDeclaration; 524 *encoded_xml += autofill_request_xml.Str().c_str(); 525 526 return true; 527} 528 529// static 530void FormStructure::ParseQueryResponse( 531 const std::string& response_xml, 532 const std::vector<FormStructure*>& forms, 533 const AutofillMetrics& metric_logger) { 534 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED); 535 536 // Parse the field types from the server response to the query. 537 std::vector<AutofillServerFieldInfo> field_infos; 538 UploadRequired upload_required; 539 std::string experiment_id; 540 AutofillQueryXmlParser parse_handler(&field_infos, 541 &upload_required, 542 &experiment_id); 543 buzz::XmlParser parser(&parse_handler); 544 parser.Parse(response_xml.c_str(), response_xml.length(), true); 545 if (!parse_handler.succeeded()) 546 return; 547 548 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED); 549 metric_logger.LogServerExperimentIdForQuery(experiment_id); 550 551 bool heuristics_detected_fillable_field = false; 552 bool query_response_overrode_heuristics = false; 553 554 // Copy the field types into the actual form. 555 std::vector<AutofillServerFieldInfo>::iterator current_info = 556 field_infos.begin(); 557 for (std::vector<FormStructure*>::const_iterator iter = forms.begin(); 558 iter != forms.end(); ++iter) { 559 FormStructure* form = *iter; 560 form->upload_required_ = upload_required; 561 form->server_experiment_id_ = experiment_id; 562 563 for (std::vector<AutofillField*>::iterator field = form->fields_.begin(); 564 field != form->fields_.end(); ++field) { 565 if (form->ShouldSkipField(**field)) 566 continue; 567 568 // In some cases *successful* response does not return all the fields. 569 // Quit the update of the types then. 570 if (current_info == field_infos.end()) 571 break; 572 573 // UNKNOWN_TYPE is reserved for use by the client. 574 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE); 575 576 ServerFieldType heuristic_type = (*field)->heuristic_type(); 577 if (heuristic_type != UNKNOWN_TYPE) 578 heuristics_detected_fillable_field = true; 579 580 (*field)->set_server_type(current_info->field_type); 581 if (heuristic_type != (*field)->Type().GetStorableType()) 582 query_response_overrode_heuristics = true; 583 584 // Copy default value into the field if available. 585 if (!current_info->default_value.empty()) 586 (*field)->set_default_value(current_info->default_value); 587 588 ++current_info; 589 } 590 591 form->UpdateAutofillCount(); 592 form->IdentifySections(false); 593 } 594 595 AutofillMetrics::ServerQueryMetric metric; 596 if (query_response_overrode_heuristics) { 597 if (heuristics_detected_fillable_field) { 598 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS; 599 } else { 600 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS; 601 } 602 } else { 603 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS; 604 } 605 metric_logger.LogServerQueryMetric(metric); 606} 607 608// static 609void FormStructure::GetFieldTypePredictions( 610 const std::vector<FormStructure*>& form_structures, 611 std::vector<FormDataPredictions>* forms) { 612 forms->clear(); 613 forms->reserve(form_structures.size()); 614 for (size_t i = 0; i < form_structures.size(); ++i) { 615 FormStructure* form_structure = form_structures[i]; 616 FormDataPredictions form; 617 form.data.name = form_structure->form_name_; 618 form.data.method = 619 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET"); 620 form.data.origin = form_structure->source_url_; 621 form.data.action = form_structure->target_url_; 622 form.signature = form_structure->FormSignature(); 623 form.experiment_id = form_structure->server_experiment_id_; 624 625 for (std::vector<AutofillField*>::const_iterator field = 626 form_structure->fields_.begin(); 627 field != form_structure->fields_.end(); ++field) { 628 form.data.fields.push_back(FormFieldData(**field)); 629 630 FormFieldDataPredictions annotated_field; 631 annotated_field.signature = (*field)->FieldSignature(); 632 annotated_field.heuristic_type = 633 AutofillType((*field)->heuristic_type()).ToString(); 634 annotated_field.server_type = 635 AutofillType((*field)->server_type()).ToString(); 636 annotated_field.overall_type = (*field)->Type().ToString(); 637 form.fields.push_back(annotated_field); 638 } 639 640 forms->push_back(form); 641 } 642} 643 644std::string FormStructure::FormSignature() const { 645 std::string scheme(target_url_.scheme()); 646 std::string host(target_url_.host()); 647 648 // If target host or scheme is empty, set scheme and host of source url. 649 // This is done to match the Toolbar's behavior. 650 if (scheme.empty() || host.empty()) { 651 scheme = source_url_.scheme(); 652 host = source_url_.host(); 653 } 654 655 std::string form_string = scheme + "://" + host + "&" + 656 UTF16ToUTF8(form_name_) + 657 form_signature_field_names_; 658 659 return Hash64Bit(form_string); 660} 661 662bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 663 return field.is_checkable; 664} 665 666bool FormStructure::IsAutofillable(bool require_method_post) const { 667 if (autofill_count() < kRequiredAutofillFields) 668 return false; 669 670 return ShouldBeParsed(require_method_post); 671} 672 673void FormStructure::UpdateAutofillCount() { 674 autofill_count_ = 0; 675 for (std::vector<AutofillField*>::const_iterator iter = begin(); 676 iter != end(); ++iter) { 677 AutofillField* field = *iter; 678 if (field && field->IsFieldFillable()) 679 ++autofill_count_; 680 } 681} 682 683bool FormStructure::ShouldBeParsed(bool require_method_post) const { 684 if (active_field_count() < kRequiredAutofillFields) 685 return false; 686 687 // Rule out http(s)://*/search?... 688 // e.g. http://www.google.com/search?q=... 689 // http://search.yahoo.com/search?p=... 690 if (target_url_.path() == "/search") 691 return false; 692 693 bool has_text_field = false; 694 for (std::vector<AutofillField*>::const_iterator it = begin(); 695 it != end() && !has_text_field; ++it) { 696 has_text_field |= (*it)->form_control_type != "select-one"; 697 } 698 if (!has_text_field) 699 return false; 700 701 return !require_method_post || (method_ == POST); 702} 703 704bool FormStructure::ShouldBeCrowdsourced() const { 705 return !has_author_specified_types_ && ShouldBeParsed(true); 706} 707 708void FormStructure::UpdateFromCache(const FormStructure& cached_form) { 709 // Map from field signatures to cached fields. 710 std::map<std::string, const AutofillField*> cached_fields; 711 for (size_t i = 0; i < cached_form.field_count(); ++i) { 712 const AutofillField* field = cached_form.field(i); 713 cached_fields[field->FieldSignature()] = field; 714 } 715 716 for (std::vector<AutofillField*>::const_iterator iter = begin(); 717 iter != end(); ++iter) { 718 AutofillField* field = *iter; 719 720 std::map<std::string, const AutofillField*>::const_iterator 721 cached_field = cached_fields.find(field->FieldSignature()); 722 if (cached_field != cached_fields.end()) { 723 if (field->form_control_type != "select-one" && 724 field->value == cached_field->second->value) { 725 // From the perspective of learning user data, text fields containing 726 // default values are equivalent to empty fields. 727 field->value = base::string16(); 728 } 729 730 field->set_heuristic_type(cached_field->second->heuristic_type()); 731 field->set_server_type(cached_field->second->server_type()); 732 } 733 } 734 735 UpdateAutofillCount(); 736 737 server_experiment_id_ = cached_form.server_experiment_id(); 738 739 // The form signature should match between query and upload requests to the 740 // server. On many websites, form elements are dynamically added, removed, or 741 // rearranged via JavaScript between page load and form submission, so we 742 // copy over the |form_signature_field_names_| corresponding to the query 743 // request. 744 DCHECK_EQ(cached_form.form_name_, form_name_); 745 DCHECK_EQ(cached_form.source_url_, source_url_); 746 DCHECK_EQ(cached_form.target_url_, target_url_); 747 form_signature_field_names_ = cached_form.form_signature_field_names_; 748} 749 750void FormStructure::LogQualityMetrics( 751 const AutofillMetrics& metric_logger, 752 const base::TimeTicks& load_time, 753 const base::TimeTicks& interaction_time, 754 const base::TimeTicks& submission_time) const { 755 std::string experiment_id = server_experiment_id(); 756 metric_logger.LogServerExperimentIdForUpload(experiment_id); 757 758 size_t num_detected_field_types = 0; 759 bool did_autofill_all_possible_fields = true; 760 bool did_autofill_some_possible_fields = false; 761 for (size_t i = 0; i < field_count(); ++i) { 762 const AutofillField* field = this->field(i); 763 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED, 764 experiment_id); 765 766 // No further logging for empty fields nor for fields where the entered data 767 // does not appear to already exist in the user's stored Autofill data. 768 const ServerFieldTypeSet& field_types = field->possible_types(); 769 DCHECK(!field_types.empty()); 770 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) 771 continue; 772 773 ++num_detected_field_types; 774 if (field->is_autofilled) 775 did_autofill_some_possible_fields = true; 776 else 777 did_autofill_all_possible_fields = false; 778 779 // Collapse field types that Chrome treats as identical, e.g. home and 780 // billing address fields. 781 ServerFieldTypeSet collapsed_field_types; 782 for (ServerFieldTypeSet::const_iterator it = field_types.begin(); 783 it != field_types.end(); 784 ++it) { 785 // Since we currently only support US phone numbers, the (city code + main 786 // digits) number is almost always identical to the whole phone number. 787 // TODO(isherman): Improve this logic once we add support for 788 // international numbers. 789 if (*it == PHONE_HOME_CITY_AND_NUMBER) 790 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER); 791 else 792 collapsed_field_types.insert(AutofillType(*it).GetStorableType()); 793 } 794 795 // Capture the field's type, if it is unambiguous. 796 ServerFieldType field_type = UNKNOWN_TYPE; 797 if (collapsed_field_types.size() == 1) 798 field_type = *collapsed_field_types.begin(); 799 800 ServerFieldType heuristic_type = 801 AutofillType(field->heuristic_type()).GetStorableType(); 802 ServerFieldType server_type = 803 AutofillType(field->server_type()).GetStorableType(); 804 ServerFieldType predicted_type = field->Type().GetStorableType(); 805 806 // Log heuristic, server, and overall type quality metrics, independently of 807 // whether the field was autofilled. 808 if (heuristic_type == UNKNOWN_TYPE) { 809 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 810 field_type, experiment_id); 811 } else if (field_types.count(heuristic_type)) { 812 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH, 813 field_type, experiment_id); 814 } else { 815 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH, 816 field_type, experiment_id); 817 } 818 819 if (server_type == NO_SERVER_DATA) { 820 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 821 field_type, experiment_id); 822 } else if (field_types.count(server_type)) { 823 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH, 824 field_type, experiment_id); 825 } else { 826 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH, 827 field_type, experiment_id); 828 } 829 830 if (predicted_type == UNKNOWN_TYPE) { 831 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 832 field_type, experiment_id); 833 } else if (field_types.count(predicted_type)) { 834 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH, 835 field_type, experiment_id); 836 } else { 837 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH, 838 field_type, experiment_id); 839 } 840 841 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we 842 // have to skip them for the remaining metrics. 843 if (field->form_control_type == "select-one") 844 continue; 845 846 if (field->is_autofilled) { 847 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED, 848 experiment_id); 849 } else { 850 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED, 851 experiment_id); 852 853 if (heuristic_type == UNKNOWN_TYPE) { 854 metric_logger.LogQualityMetric( 855 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN, 856 experiment_id); 857 } else if (field_types.count(heuristic_type)) { 858 metric_logger.LogQualityMetric( 859 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH, 860 experiment_id); 861 } else { 862 metric_logger.LogQualityMetric( 863 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH, 864 experiment_id); 865 } 866 867 if (server_type == NO_SERVER_DATA) { 868 metric_logger.LogQualityMetric( 869 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN, 870 experiment_id); 871 } else if (field_types.count(server_type)) { 872 metric_logger.LogQualityMetric( 873 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH, 874 experiment_id); 875 } else { 876 metric_logger.LogQualityMetric( 877 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH, 878 experiment_id); 879 } 880 } 881 } 882 883 if (num_detected_field_types < kRequiredAutofillFields) { 884 metric_logger.LogUserHappinessMetric( 885 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM); 886 } else { 887 if (did_autofill_all_possible_fields) { 888 metric_logger.LogUserHappinessMetric( 889 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL); 890 } else if (did_autofill_some_possible_fields) { 891 metric_logger.LogUserHappinessMetric( 892 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME); 893 } else { 894 metric_logger.LogUserHappinessMetric( 895 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE); 896 } 897 898 // Unlike the other times, the |submission_time| should always be available. 899 DCHECK(!submission_time.is_null()); 900 901 // The |load_time| might be unset, in the case that the form was dynamically 902 // added to the DOM. 903 if (!load_time.is_null()) { 904 // Submission should always chronologically follow form load. 905 DCHECK(submission_time > load_time); 906 base::TimeDelta elapsed = submission_time - load_time; 907 if (did_autofill_some_possible_fields) 908 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed); 909 else 910 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed); 911 } 912 913 // The |interaction_time| might be unset, in the case that the user 914 // submitted a blank form. 915 if (!interaction_time.is_null()) { 916 // Submission should always chronologically follow interaction. 917 DCHECK(submission_time > interaction_time); 918 base::TimeDelta elapsed = submission_time - interaction_time; 919 if (did_autofill_some_possible_fields) { 920 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed); 921 } else { 922 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill( 923 elapsed); 924 } 925 } 926 } 927} 928 929const AutofillField* FormStructure::field(size_t index) const { 930 if (index >= fields_.size()) { 931 NOTREACHED(); 932 return NULL; 933 } 934 935 return fields_[index]; 936} 937 938AutofillField* FormStructure::field(size_t index) { 939 return const_cast<AutofillField*>( 940 static_cast<const FormStructure*>(this)->field(index)); 941} 942 943size_t FormStructure::field_count() const { 944 return fields_.size(); 945} 946 947size_t FormStructure::active_field_count() const { 948 return active_field_count_; 949} 950 951std::string FormStructure::server_experiment_id() const { 952 return server_experiment_id_; 953} 954 955FormData FormStructure::ToFormData() const { 956 // |data.user_submitted| will always be false. 957 FormData data; 958 data.name = form_name_; 959 data.origin = source_url_; 960 data.action = target_url_; 961 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET"); 962 963 for (size_t i = 0; i < fields_.size(); ++i) { 964 data.fields.push_back(FormFieldData(*fields_[i])); 965 } 966 967 return data; 968} 969 970bool FormStructure::operator==(const FormData& form) const { 971 // TODO(jhawkins): Is this enough to differentiate a form? 972 if (form_name_ == form.name && 973 source_url_ == form.origin && 974 target_url_ == form.action) { 975 return true; 976 } 977 978 // TODO(jhawkins): Compare field names, IDs and labels once we have labels 979 // set up. 980 981 return false; 982} 983 984bool FormStructure::operator!=(const FormData& form) const { 985 return !operator==(form); 986} 987 988std::string FormStructure::Hash64Bit(const std::string& str) { 989 std::string hash_bin = base::SHA1HashString(str); 990 DCHECK_EQ(20U, hash_bin.length()); 991 992 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) | 993 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) | 994 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) | 995 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) | 996 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) | 997 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) | 998 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) | 999 ((static_cast<uint64>(hash_bin[7])) & 0xFF); 1000 1001 return base::Uint64ToString(hash64); 1002} 1003 1004bool FormStructure::EncodeFormRequest( 1005 FormStructure::EncodeRequestType request_type, 1006 buzz::XmlElement* encompassing_xml_element) const { 1007 if (!field_count()) // Nothing to add. 1008 return false; 1009 1010 // Some badly formatted web sites repeat fields - limit number of fields to 1011 // 48, which is far larger than any valid form and XML still fits into 2K. 1012 // Do not send requests for forms with more than this many fields, as they are 1013 // near certainly not valid/auto-fillable. 1014 const size_t kMaxFieldsOnTheForm = 48; 1015 if (field_count() > kMaxFieldsOnTheForm) 1016 return false; 1017 1018 // Add the child nodes for the form fields. 1019 for (size_t index = 0; index < field_count(); ++index) { 1020 const AutofillField* field = fields_[index]; 1021 switch (request_type) { 1022 case FormStructure::UPLOAD: 1023 EncodeFieldForUpload(*field, encompassing_xml_element); 1024 break; 1025 case FormStructure::QUERY: 1026 if (ShouldSkipField(*field)) 1027 continue; 1028 EncodeFieldForQuery(*field, encompassing_xml_element); 1029 break; 1030 case FormStructure::FIELD_ASSIGNMENTS: 1031 EncodeFieldForFieldAssignments(*field, encompassing_xml_element); 1032 break; 1033 } 1034 } 1035 return true; 1036} 1037 1038void FormStructure::ParseFieldTypesFromAutocompleteAttributes( 1039 bool* found_types, 1040 bool* found_sections) { 1041 const std::string kDefaultSection = "-default"; 1042 1043 *found_types = false; 1044 *found_sections = false; 1045 for (std::vector<AutofillField*>::iterator it = fields_.begin(); 1046 it != fields_.end(); ++it) { 1047 AutofillField* field = *it; 1048 1049 // To prevent potential section name collisions, add a default suffix for 1050 // other fields. Without this, 'autocomplete' attribute values 1051 // "section--shipping street-address" and "shipping street-address" would be 1052 // parsed identically, given the section handling code below. We do this 1053 // before any validation so that fields with invalid attributes still end up 1054 // in the default section. These default section names will be overridden 1055 // by subsequent heuristic parsing steps if there are no author-specified 1056 // section names. 1057 field->set_section(kDefaultSection); 1058 1059 // Canonicalize the attribute value by trimming whitespace, collapsing 1060 // non-space characters (e.g. tab) to spaces, and converting to lowercase. 1061 std::string autocomplete_attribute = 1062 CollapseWhitespaceASCII(field->autocomplete_attribute, false); 1063 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute); 1064 1065 // The autocomplete attribute is overloaded: it can specify either a field 1066 // type hint or whether autocomplete should be enabled at all. Ignore the 1067 // latter type of attribute value. 1068 if (autocomplete_attribute.empty() || 1069 autocomplete_attribute == "on" || 1070 autocomplete_attribute == "off") { 1071 continue; 1072 } 1073 1074 // Any other value, even it is invalid, is considered to be a type hint. 1075 // This allows a website's author to specify an attribute like 1076 // autocomplete="other" on a field to disable all Autofill heuristics for 1077 // the form. 1078 *found_types = true; 1079 1080 // Tokenize the attribute value. Per the spec, the tokens are parsed in 1081 // reverse order. 1082 std::vector<std::string> tokens; 1083 Tokenize(autocomplete_attribute, " ", &tokens); 1084 1085 // The final token must be the field type. 1086 // If it is not one of the known types, abort. 1087 DCHECK(!tokens.empty()); 1088 std::string field_type_token = tokens.back(); 1089 tokens.pop_back(); 1090 HtmlFieldType field_type = 1091 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field); 1092 if (field_type == HTML_TYPE_UNKNOWN) 1093 continue; 1094 1095 // The preceding token, if any, may be a type hint. 1096 if (!tokens.empty() && IsContactTypeHint(tokens.back())) { 1097 // If it is, it must match the field type; otherwise, abort. 1098 // Note that an invalid token invalidates the entire attribute value, even 1099 // if the other tokens are valid. 1100 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type)) 1101 continue; 1102 1103 // Chrome Autofill ignores these type hints. 1104 tokens.pop_back(); 1105 } 1106 1107 // The preceding token, if any, may be a fixed string that is either 1108 // "shipping" or "billing". Chrome Autofill treats these as implicit 1109 // section name suffixes. 1110 DCHECK_EQ(kDefaultSection, field->section()); 1111 std::string section = field->section(); 1112 HtmlFieldMode mode = HTML_MODE_NONE; 1113 if (!tokens.empty()) { 1114 if (tokens.back() == kShippingMode) 1115 mode = HTML_MODE_SHIPPING; 1116 else if (tokens.back() == kBillingMode) 1117 mode = HTML_MODE_BILLING; 1118 } 1119 1120 if (mode != HTML_MODE_NONE) { 1121 section = "-" + tokens.back(); 1122 tokens.pop_back(); 1123 } 1124 1125 // The preceding token, if any, may be a named section. 1126 const std::string kSectionPrefix = "section-"; 1127 if (!tokens.empty() && 1128 StartsWithASCII(tokens.back(), kSectionPrefix, true)) { 1129 // Prepend this section name to the suffix set in the preceding block. 1130 section = tokens.back().substr(kSectionPrefix.size()) + section; 1131 tokens.pop_back(); 1132 } 1133 1134 // No other tokens are allowed. If there are any remaining, abort. 1135 if (!tokens.empty()) 1136 continue; 1137 1138 if (section != kDefaultSection) { 1139 *found_sections = true; 1140 field->set_section(section); 1141 } 1142 1143 // No errors encountered while parsing! 1144 // Update the |field|'s type based on what was parsed from the attribute. 1145 field->SetHtmlType(field_type, mode); 1146 } 1147} 1148 1149void FormStructure::IdentifySections(bool has_author_specified_sections) { 1150 if (fields_.empty()) 1151 return; 1152 1153 if (!has_author_specified_sections) { 1154 // Name sections after the first field in the section. 1155 base::string16 current_section = fields_.front()->unique_name(); 1156 1157 // Keep track of the types we've seen in this section. 1158 std::set<ServerFieldType> seen_types; 1159 ServerFieldType previous_type = UNKNOWN_TYPE; 1160 1161 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1162 field != fields_.end(); ++field) { 1163 const ServerFieldType current_type = (*field)->Type().GetStorableType(); 1164 1165 bool already_saw_current_type = seen_types.count(current_type) > 0; 1166 1167 // Forms often ask for multiple phone numbers -- e.g. both a daytime and 1168 // evening phone number. Our phone number detection is also generally a 1169 // little off. Hence, ignore this field type as a signal here. 1170 if (AutofillType(current_type).group() == PHONE_HOME) 1171 already_saw_current_type = false; 1172 1173 // Some forms have adjacent fields of the same type. Two common examples: 1174 // * Forms with two email fields, where the second is meant to "confirm" 1175 // the first. 1176 // * Forms with a <select> menu for states in some countries, and a 1177 // freeform <input> field for states in other countries. (Usually, 1178 // only one of these two will be visible for any given choice of 1179 // country.) 1180 // Generally, adjacent fields of the same type belong in the same logical 1181 // section. 1182 if (current_type == previous_type) 1183 already_saw_current_type = false; 1184 1185 previous_type = current_type; 1186 1187 if (current_type != UNKNOWN_TYPE && already_saw_current_type) { 1188 // We reached the end of a section, so start a new section. 1189 seen_types.clear(); 1190 current_section = (*field)->unique_name(); 1191 } 1192 1193 seen_types.insert(current_type); 1194 (*field)->set_section(UTF16ToUTF8(current_section)); 1195 } 1196 } 1197 1198 // Ensure that credit card and address fields are in separate sections. 1199 // This simplifies the section-aware logic in autofill_manager.cc. 1200 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1201 field != fields_.end(); ++field) { 1202 FieldTypeGroup field_type_group = (*field)->Type().group(); 1203 if (field_type_group == CREDIT_CARD) 1204 (*field)->set_section((*field)->section() + "-cc"); 1205 else 1206 (*field)->set_section((*field)->section() + "-default"); 1207 } 1208} 1209 1210} // namespace autofill 1211