form_structure.cc revision 1e9bf3e0803691d0a228da41fc608347b6db4340
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/form_structure.h" 6 7#include <utility> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/logging.h" 12#include "base/memory/scoped_ptr.h" 13#include "base/sha1.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/strings/string_util.h" 16#include "base/strings/stringprintf.h" 17#include "base/strings/utf_string_conversions.h" 18#include "base/time/time.h" 19#include "components/autofill/core/browser/autofill_metrics.h" 20#include "components/autofill/core/browser/autofill_type.h" 21#include "components/autofill/core/browser/autofill_xml_parser.h" 22#include "components/autofill/core/browser/field_types.h" 23#include "components/autofill/core/browser/form_field.h" 24#include "components/autofill/core/common/autofill_constants.h" 25#include "components/autofill/core/common/form_data.h" 26#include "components/autofill/core/common/form_data_predictions.h" 27#include "components/autofill/core/common/form_field_data.h" 28#include "components/autofill/core/common/form_field_data_predictions.h" 29#include "third_party/icu/source/i18n/unicode/regex.h" 30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h" 31 32namespace autofill { 33namespace { 34 35const char kFormMethodPost[] = "post"; 36 37// XML elements and attributes. 38const char kAttributeAcceptedFeatures[] = "accepts"; 39const char kAttributeAutofillUsed[] = "autofillused"; 40const char kAttributeAutofillType[] = "autofilltype"; 41const char kAttributeClientVersion[] = "clientversion"; 42const char kAttributeDataPresent[] = "datapresent"; 43const char kAttributeFieldID[] = "fieldid"; 44const char kAttributeFieldType[] = "fieldtype"; 45const char kAttributeFormSignature[] = "formsignature"; 46const char kAttributeName[] = "name"; 47const char kAttributeSignature[] = "signature"; 48const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments 49const char kClientVersion[] = "6.1.1715.1442/en (GGLL)"; 50const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 51const char kXMLElementAutofillQuery[] = "autofillquery"; 52const char kXMLElementAutofillUpload[] = "autofillupload"; 53const char kXMLElementFieldAssignments[] = "fieldassignments"; 54const char kXMLElementField[] = "field"; 55const char kXMLElementFields[] = "fields"; 56const char kXMLElementForm[] = "form"; 57const char kBillingMode[] = "billing"; 58const char kShippingMode[] = "shipping"; 59 60// Stip away >= 5 consecutive digits. 61const char kIgnorePatternInFieldName[] = "\\d{5,}+"; 62 63// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to 64// |available_field_types| and returns the hex representation as a string. 65std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) { 66 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte, 67 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field. 68 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8; 69 70 // Pack the types in |available_field_types| into |bit_field|. 71 std::vector<uint8> bit_field(kNumBytes, 0); 72 for (ServerFieldTypeSet::const_iterator field_type = 73 available_field_types.begin(); 74 field_type != available_field_types.end(); 75 ++field_type) { 76 // Set the appropriate bit in the field. The bit we set is the one 77 // |field_type| % 8 from the left of the byte. 78 const size_t byte = *field_type / 8; 79 const size_t bit = 0x80 >> (*field_type % 8); 80 DCHECK(byte < bit_field.size()); 81 bit_field[byte] |= bit; 82 } 83 84 // Discard any trailing zeroes. 85 // If there are no available types, we return the empty string. 86 size_t data_end = bit_field.size(); 87 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) { 88 } 89 90 // Print all meaningfull bytes into a string. 91 std::string data_presence; 92 data_presence.reserve(data_end * 2 + 1); 93 for (size_t i = 0; i < data_end; ++i) { 94 base::StringAppendF(&data_presence, "%02x", bit_field[i]); 95 } 96 97 return data_presence; 98} 99 100// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 101// in upload xml, and also add them to the parent XmlElement. 102void EncodeFieldForUpload(const AutofillField& field, 103 buzz::XmlElement* parent) { 104 // Don't upload checkable fields. 105 if (field.is_checkable) 106 return; 107 108 ServerFieldTypeSet types = field.possible_types(); 109 // |types| could be empty in unit-tests only. 110 for (ServerFieldTypeSet::iterator field_type = types.begin(); 111 field_type != types.end(); ++field_type) { 112 buzz::XmlElement *field_element = new buzz::XmlElement( 113 buzz::QName(kXMLElementField)); 114 115 field_element->SetAttr(buzz::QName(kAttributeSignature), 116 field.FieldSignature()); 117 field_element->SetAttr(buzz::QName(kAttributeAutofillType), 118 base::IntToString(*field_type)); 119 parent->AddElement(field_element); 120 } 121} 122 123// Helper for |EncodeFormRequest()| that creates XmlElement for the given field 124// in query xml, and also add it to the parent XmlElement. 125void EncodeFieldForQuery(const AutofillField& field, 126 buzz::XmlElement* parent) { 127 buzz::XmlElement *field_element = new buzz::XmlElement( 128 buzz::QName(kXMLElementField)); 129 field_element->SetAttr(buzz::QName(kAttributeSignature), 130 field.FieldSignature()); 131 parent->AddElement(field_element); 132} 133 134// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 135// in field assignments xml, and also add them to the parent XmlElement. 136void EncodeFieldForFieldAssignments(const AutofillField& field, 137 buzz::XmlElement* parent) { 138 ServerFieldTypeSet types = field.possible_types(); 139 for (ServerFieldTypeSet::iterator field_type = types.begin(); 140 field_type != types.end(); ++field_type) { 141 buzz::XmlElement *field_element = new buzz::XmlElement( 142 buzz::QName(kXMLElementFields)); 143 144 field_element->SetAttr(buzz::QName(kAttributeFieldID), 145 field.FieldSignature()); 146 field_element->SetAttr(buzz::QName(kAttributeFieldType), 147 base::IntToString(*field_type)); 148 field_element->SetAttr(buzz::QName(kAttributeName), 149 UTF16ToUTF8(field.name)); 150 parent->AddElement(field_element); 151 } 152} 153 154// Returns |true| iff the |token| is a type hint for a contact field, as 155// specified in the implementation section of http://is.gd/whatwg_autocomplete 156// Note that "fax" and "pager" are intentionally ignored, as Chrome does not 157// support filling either type of information. 158bool IsContactTypeHint(const std::string& token) { 159 return token == "home" || token == "work" || token == "mobile"; 160} 161 162// Returns |true| iff the |token| is a type hint appropriate for a field of the 163// given |field_type|, as specified in the implementation section of 164// http://is.gd/whatwg_autocomplete 165bool ContactTypeHintMatchesFieldType(const std::string& token, 166 HtmlFieldType field_type) { 167 // The "home" and "work" type hints are only appropriate for email and phone 168 // number field types. 169 if (token == "home" || token == "work") { 170 return field_type == HTML_TYPE_EMAIL || 171 (field_type >= HTML_TYPE_TEL && 172 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX); 173 } 174 175 // The "mobile" type hint is only appropriate for phone number field types. 176 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not 177 // support filling either type of information. 178 if (token == "mobile") { 179 return field_type >= HTML_TYPE_TEL && 180 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX; 181 } 182 183 return false; 184} 185 186// Returns the Chrome Autofill-supported field type corresponding to the given 187// |autocomplete_attribute_value|, if there is one, in the context of the given 188// |field|. Chrome Autofill supports a subset of the field types listed at 189// http://is.gd/whatwg_autocomplete 190HtmlFieldType FieldTypeFromAutocompleteAttributeValue( 191 const std::string& autocomplete_attribute_value, 192 const AutofillField& field) { 193 if (autocomplete_attribute_value == "name") 194 return HTML_TYPE_NAME; 195 196 if (autocomplete_attribute_value == "given-name") 197 return HTML_TYPE_GIVEN_NAME; 198 199 if (autocomplete_attribute_value == "additional-name") { 200 if (field.max_length == 1) 201 return HTML_TYPE_ADDITIONAL_NAME_INITIAL; 202 else 203 return HTML_TYPE_ADDITIONAL_NAME; 204 } 205 206 if (autocomplete_attribute_value == "family-name") 207 return HTML_TYPE_FAMILY_NAME; 208 209 if (autocomplete_attribute_value == "organization") 210 return HTML_TYPE_ORGANIZATION; 211 212 if (autocomplete_attribute_value == "street-address") 213 return HTML_TYPE_STREET_ADDRESS; 214 215 if (autocomplete_attribute_value == "address-line1") 216 return HTML_TYPE_ADDRESS_LINE1; 217 218 if (autocomplete_attribute_value == "address-line2") 219 return HTML_TYPE_ADDRESS_LINE2; 220 221 if (autocomplete_attribute_value == "locality") 222 return HTML_TYPE_LOCALITY; 223 224 if (autocomplete_attribute_value == "region") 225 return HTML_TYPE_REGION; 226 227 if (autocomplete_attribute_value == "country") 228 return HTML_TYPE_COUNTRY_CODE; 229 230 if (autocomplete_attribute_value == "country-name") 231 return HTML_TYPE_COUNTRY_NAME; 232 233 if (autocomplete_attribute_value == "postal-code") 234 return HTML_TYPE_POSTAL_CODE; 235 236 if (autocomplete_attribute_value == "cc-name") 237 return HTML_TYPE_CREDIT_CARD_NAME; 238 239 if (autocomplete_attribute_value == "cc-number") 240 return HTML_TYPE_CREDIT_CARD_NUMBER; 241 242 if (autocomplete_attribute_value == "cc-exp") { 243 if (field.max_length == 5) 244 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR; 245 else if (field.max_length == 7) 246 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR; 247 else 248 return HTML_TYPE_CREDIT_CARD_EXP; 249 } 250 251 if (autocomplete_attribute_value == "cc-exp-month") 252 return HTML_TYPE_CREDIT_CARD_EXP_MONTH; 253 254 if (autocomplete_attribute_value == "cc-exp-year") { 255 if (field.max_length == 2) 256 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR; 257 else if (field.max_length == 4) 258 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR; 259 else 260 return HTML_TYPE_CREDIT_CARD_EXP_YEAR; 261 } 262 263 if (autocomplete_attribute_value == "cc-csc") 264 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE; 265 266 if (autocomplete_attribute_value == "cc-type") 267 return HTML_TYPE_CREDIT_CARD_TYPE; 268 269 if (autocomplete_attribute_value == "tel") 270 return HTML_TYPE_TEL; 271 272 if (autocomplete_attribute_value == "tel-country-code") 273 return HTML_TYPE_TEL_COUNTRY_CODE; 274 275 if (autocomplete_attribute_value == "tel-national") 276 return HTML_TYPE_TEL_NATIONAL; 277 278 if (autocomplete_attribute_value == "tel-area-code") 279 return HTML_TYPE_TEL_AREA_CODE; 280 281 if (autocomplete_attribute_value == "tel-local") 282 return HTML_TYPE_TEL_LOCAL; 283 284 if (autocomplete_attribute_value == "tel-local-prefix") 285 return HTML_TYPE_TEL_LOCAL_PREFIX; 286 287 if (autocomplete_attribute_value == "tel-local-suffix") 288 return HTML_TYPE_TEL_LOCAL_SUFFIX; 289 290 if (autocomplete_attribute_value == "email") 291 return HTML_TYPE_EMAIL; 292 293 return HTML_TYPE_UNKNOWN; 294} 295 296std::string StripDigitsIfRequired(const base::string16& input) { 297 UErrorCode status = U_ZERO_ERROR; 298 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern, 299 (kIgnorePatternInFieldName)); 300 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher, 301 (icu_pattern, UREGEX_CASE_INSENSITIVE, status)); 302 DCHECK_EQ(status, U_ZERO_ERROR); 303 304 icu::UnicodeString icu_input(input.data(), input.length()); 305 matcher.reset(icu_input); 306 307 icu::UnicodeString replaced_string = matcher.replaceAll("", status); 308 309 std::string return_string; 310 status = U_ZERO_ERROR; 311 UTF16ToUTF8(replaced_string.getBuffer(), 312 static_cast<size_t>(replaced_string.length()), 313 &return_string); 314 if (status != U_ZERO_ERROR) { 315 DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input); 316 return UTF16ToUTF8(input); 317 } 318 319 return return_string; 320} 321 322} // namespace 323 324FormStructure::FormStructure(const FormData& form) 325 : form_name_(form.name), 326 source_url_(form.origin), 327 target_url_(form.action), 328 autofill_count_(0), 329 active_field_count_(0), 330 upload_required_(USE_UPLOAD_RATES), 331 server_experiment_id_("no server response"), 332 has_author_specified_types_(false) { 333 // Copy the form fields. 334 std::map<base::string16, size_t> unique_names; 335 for (std::vector<FormFieldData>::const_iterator field = 336 form.fields.begin(); 337 field != form.fields.end(); ++field) { 338 if (!ShouldSkipField(*field)) { 339 // Add all supported form fields (including with empty names) to the 340 // signature. This is a requirement for Autofill servers. 341 form_signature_field_names_.append("&"); 342 form_signature_field_names_.append(StripDigitsIfRequired(field->name)); 343 344 ++active_field_count_; 345 } 346 347 // Generate a unique name for this field by appending a counter to the name. 348 // Make sure to prepend the counter with a non-numeric digit so that we are 349 // guaranteed to avoid collisions. 350 if (!unique_names.count(field->name)) 351 unique_names[field->name] = 1; 352 else 353 ++unique_names[field->name]; 354 base::string16 unique_name = field->name + ASCIIToUTF16("_") + 355 base::IntToString16(unique_names[field->name]); 356 fields_.push_back(new AutofillField(*field, unique_name)); 357 } 358 359 std::string method = UTF16ToUTF8(form.method); 360 if (StringToLowerASCII(method) == kFormMethodPost) { 361 method_ = POST; 362 } else { 363 // Either the method is 'get', or we don't know. In this case we default 364 // to GET. 365 method_ = GET; 366 } 367} 368 369FormStructure::~FormStructure() {} 370 371void FormStructure::DetermineHeuristicTypes( 372 const AutofillMetrics& metric_logger) { 373 // First, try to detect field types based on each field's |autocomplete| 374 // attribute value. If there is at least one form field that specifies an 375 // autocomplete type hint, don't try to apply other heuristics to match fields 376 // in this form. 377 bool has_author_specified_sections; 378 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_, 379 &has_author_specified_sections); 380 381 if (!has_author_specified_types_) { 382 ServerFieldTypeMap field_type_map; 383 FormField::ParseFormFields(fields_.get(), &field_type_map); 384 for (size_t i = 0; i < field_count(); ++i) { 385 AutofillField* field = fields_[i]; 386 ServerFieldTypeMap::iterator iter = 387 field_type_map.find(field->unique_name()); 388 if (iter != field_type_map.end()) 389 field->set_heuristic_type(iter->second); 390 } 391 } 392 393 UpdateAutofillCount(); 394 IdentifySections(has_author_specified_sections); 395 396 if (IsAutofillable(true)) { 397 metric_logger.LogDeveloperEngagementMetric( 398 AutofillMetrics::FILLABLE_FORM_PARSED); 399 if (has_author_specified_types_) { 400 metric_logger.LogDeveloperEngagementMetric( 401 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS); 402 } 403 } 404} 405 406bool FormStructure::EncodeUploadRequest( 407 const ServerFieldTypeSet& available_field_types, 408 bool form_was_autofilled, 409 std::string* encoded_xml) const { 410 DCHECK(ShouldBeCrowdsourced()); 411 412 // Verify that |available_field_types| agrees with the possible field types we 413 // are uploading. 414 for (std::vector<AutofillField*>::const_iterator field = begin(); 415 field != end(); 416 ++field) { 417 for (ServerFieldTypeSet::const_iterator type = 418 (*field)->possible_types().begin(); 419 type != (*field)->possible_types().end(); 420 ++type) { 421 DCHECK(*type == UNKNOWN_TYPE || 422 *type == EMPTY_TYPE || 423 available_field_types.count(*type)); 424 } 425 } 426 427 // Set up the <autofillupload> element and its attributes. 428 buzz::XmlElement autofill_request_xml( 429 (buzz::QName(kXMLElementAutofillUpload))); 430 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 431 kClientVersion); 432 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 433 FormSignature()); 434 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed), 435 form_was_autofilled ? "true" : "false"); 436 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent), 437 EncodeFieldTypes(available_field_types).c_str()); 438 439 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml)) 440 return false; // Malformed form, skip it. 441 442 // Obtain the XML structure as a string. 443 *encoded_xml = kXMLDeclaration; 444 *encoded_xml += autofill_request_xml.Str().c_str(); 445 446 // To enable this logging, run with the flag --vmodule="form_structure=2". 447 VLOG(2) << "\n" << *encoded_xml; 448 449 return true; 450} 451 452bool FormStructure::EncodeFieldAssignments( 453 const ServerFieldTypeSet& available_field_types, 454 std::string* encoded_xml) const { 455 DCHECK(ShouldBeCrowdsourced()); 456 457 // Set up the <fieldassignments> element and its attributes. 458 buzz::XmlElement autofill_request_xml( 459 (buzz::QName(kXMLElementFieldAssignments))); 460 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 461 FormSignature()); 462 463 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS, 464 &autofill_request_xml)) 465 return false; // Malformed form, skip it. 466 467 // Obtain the XML structure as a string. 468 *encoded_xml = kXMLDeclaration; 469 *encoded_xml += autofill_request_xml.Str().c_str(); 470 471 return true; 472} 473 474// static 475bool FormStructure::EncodeQueryRequest( 476 const std::vector<FormStructure*>& forms, 477 std::vector<std::string>* encoded_signatures, 478 std::string* encoded_xml) { 479 DCHECK(encoded_signatures); 480 DCHECK(encoded_xml); 481 encoded_xml->clear(); 482 encoded_signatures->clear(); 483 encoded_signatures->reserve(forms.size()); 484 485 // Set up the <autofillquery> element and attributes. 486 buzz::XmlElement autofill_request_xml( 487 (buzz::QName(kXMLElementAutofillQuery))); 488 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 489 kClientVersion); 490 491 // Some badly formatted web sites repeat forms - detect that and encode only 492 // one form as returned data would be the same for all the repeated forms. 493 std::set<std::string> processed_forms; 494 for (ScopedVector<FormStructure>::const_iterator it = forms.begin(); 495 it != forms.end(); 496 ++it) { 497 std::string signature((*it)->FormSignature()); 498 if (processed_forms.find(signature) != processed_forms.end()) 499 continue; 500 processed_forms.insert(signature); 501 scoped_ptr<buzz::XmlElement> encompassing_xml_element( 502 new buzz::XmlElement(buzz::QName(kXMLElementForm))); 503 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature), 504 signature); 505 506 if (!(*it)->EncodeFormRequest(FormStructure::QUERY, 507 encompassing_xml_element.get())) 508 continue; // Malformed form, skip it. 509 510 autofill_request_xml.AddElement(encompassing_xml_element.release()); 511 encoded_signatures->push_back(signature); 512 } 513 514 if (!encoded_signatures->size()) 515 return false; 516 517 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures), 518 kAcceptedFeaturesExperiment); 519 520 // Obtain the XML structure as a string. 521 *encoded_xml = kXMLDeclaration; 522 *encoded_xml += autofill_request_xml.Str().c_str(); 523 524 return true; 525} 526 527// static 528void FormStructure::ParseQueryResponse( 529 const std::string& response_xml, 530 const std::vector<FormStructure*>& forms, 531 const AutofillMetrics& metric_logger) { 532 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED); 533 534 // Parse the field types from the server response to the query. 535 std::vector<AutofillServerFieldInfo> field_infos; 536 UploadRequired upload_required; 537 std::string experiment_id; 538 AutofillQueryXmlParser parse_handler(&field_infos, 539 &upload_required, 540 &experiment_id); 541 buzz::XmlParser parser(&parse_handler); 542 parser.Parse(response_xml.c_str(), response_xml.length(), true); 543 if (!parse_handler.succeeded()) 544 return; 545 546 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED); 547 metric_logger.LogServerExperimentIdForQuery(experiment_id); 548 549 bool heuristics_detected_fillable_field = false; 550 bool query_response_overrode_heuristics = false; 551 552 // Copy the field types into the actual form. 553 std::vector<AutofillServerFieldInfo>::iterator current_info = 554 field_infos.begin(); 555 for (std::vector<FormStructure*>::const_iterator iter = forms.begin(); 556 iter != forms.end(); ++iter) { 557 FormStructure* form = *iter; 558 form->upload_required_ = upload_required; 559 form->server_experiment_id_ = experiment_id; 560 561 for (std::vector<AutofillField*>::iterator field = form->fields_.begin(); 562 field != form->fields_.end(); ++field) { 563 if (form->ShouldSkipField(**field)) 564 continue; 565 566 // In some cases *successful* response does not return all the fields. 567 // Quit the update of the types then. 568 if (current_info == field_infos.end()) 569 break; 570 571 // UNKNOWN_TYPE is reserved for use by the client. 572 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE); 573 574 ServerFieldType heuristic_type = (*field)->heuristic_type(); 575 if (heuristic_type != UNKNOWN_TYPE) 576 heuristics_detected_fillable_field = true; 577 578 (*field)->set_server_type(current_info->field_type); 579 if (heuristic_type != (*field)->Type().GetStorableType()) 580 query_response_overrode_heuristics = true; 581 582 // Copy default value into the field if available. 583 if (!current_info->default_value.empty()) 584 (*field)->set_default_value(current_info->default_value); 585 586 ++current_info; 587 } 588 589 form->UpdateAutofillCount(); 590 form->IdentifySections(false); 591 } 592 593 AutofillMetrics::ServerQueryMetric metric; 594 if (query_response_overrode_heuristics) { 595 if (heuristics_detected_fillable_field) { 596 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS; 597 } else { 598 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS; 599 } 600 } else { 601 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS; 602 } 603 metric_logger.LogServerQueryMetric(metric); 604} 605 606// static 607void FormStructure::GetFieldTypePredictions( 608 const std::vector<FormStructure*>& form_structures, 609 std::vector<FormDataPredictions>* forms) { 610 forms->clear(); 611 forms->reserve(form_structures.size()); 612 for (size_t i = 0; i < form_structures.size(); ++i) { 613 FormStructure* form_structure = form_structures[i]; 614 FormDataPredictions form; 615 form.data.name = form_structure->form_name_; 616 form.data.method = 617 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET"); 618 form.data.origin = form_structure->source_url_; 619 form.data.action = form_structure->target_url_; 620 form.signature = form_structure->FormSignature(); 621 form.experiment_id = form_structure->server_experiment_id_; 622 623 for (std::vector<AutofillField*>::const_iterator field = 624 form_structure->fields_.begin(); 625 field != form_structure->fields_.end(); ++field) { 626 form.data.fields.push_back(FormFieldData(**field)); 627 628 FormFieldDataPredictions annotated_field; 629 annotated_field.signature = (*field)->FieldSignature(); 630 annotated_field.heuristic_type = 631 AutofillType((*field)->heuristic_type()).ToString(); 632 annotated_field.server_type = 633 AutofillType((*field)->server_type()).ToString(); 634 annotated_field.overall_type = (*field)->Type().ToString(); 635 form.fields.push_back(annotated_field); 636 } 637 638 forms->push_back(form); 639 } 640} 641 642std::string FormStructure::FormSignature() const { 643 std::string scheme(target_url_.scheme()); 644 std::string host(target_url_.host()); 645 646 // If target host or scheme is empty, set scheme and host of source url. 647 // This is done to match the Toolbar's behavior. 648 if (scheme.empty() || host.empty()) { 649 scheme = source_url_.scheme(); 650 host = source_url_.host(); 651 } 652 653 std::string form_string = scheme + "://" + host + "&" + 654 UTF16ToUTF8(form_name_) + 655 form_signature_field_names_; 656 657 return Hash64Bit(form_string); 658} 659 660bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 661 return field.is_checkable; 662} 663 664bool FormStructure::IsAutofillable(bool require_method_post) const { 665 if (autofill_count() < kRequiredAutofillFields) 666 return false; 667 668 return ShouldBeParsed(require_method_post); 669} 670 671void FormStructure::UpdateAutofillCount() { 672 autofill_count_ = 0; 673 for (std::vector<AutofillField*>::const_iterator iter = begin(); 674 iter != end(); ++iter) { 675 AutofillField* field = *iter; 676 if (field && field->IsFieldFillable()) 677 ++autofill_count_; 678 } 679} 680 681bool FormStructure::ShouldBeParsed(bool require_method_post) const { 682 if (active_field_count() < kRequiredAutofillFields) 683 return false; 684 685 // Rule out http(s)://*/search?... 686 // e.g. http://www.google.com/search?q=... 687 // http://search.yahoo.com/search?p=... 688 if (target_url_.path() == "/search") 689 return false; 690 691 bool has_text_field = false; 692 for (std::vector<AutofillField*>::const_iterator it = begin(); 693 it != end() && !has_text_field; ++it) { 694 has_text_field |= (*it)->form_control_type != "select-one"; 695 } 696 if (!has_text_field) 697 return false; 698 699 return !require_method_post || (method_ == POST); 700} 701 702bool FormStructure::ShouldBeCrowdsourced() const { 703 return !has_author_specified_types_ && ShouldBeParsed(true); 704} 705 706void FormStructure::UpdateFromCache(const FormStructure& cached_form) { 707 // Map from field signatures to cached fields. 708 std::map<std::string, const AutofillField*> cached_fields; 709 for (size_t i = 0; i < cached_form.field_count(); ++i) { 710 const AutofillField* field = cached_form.field(i); 711 cached_fields[field->FieldSignature()] = field; 712 } 713 714 for (std::vector<AutofillField*>::const_iterator iter = begin(); 715 iter != end(); ++iter) { 716 AutofillField* field = *iter; 717 718 std::map<std::string, const AutofillField*>::const_iterator 719 cached_field = cached_fields.find(field->FieldSignature()); 720 if (cached_field != cached_fields.end()) { 721 if (field->form_control_type != "select-one" && 722 field->value == cached_field->second->value) { 723 // From the perspective of learning user data, text fields containing 724 // default values are equivalent to empty fields. 725 field->value = base::string16(); 726 } 727 728 field->set_heuristic_type(cached_field->second->heuristic_type()); 729 field->set_server_type(cached_field->second->server_type()); 730 } 731 } 732 733 UpdateAutofillCount(); 734 735 server_experiment_id_ = cached_form.server_experiment_id(); 736 737 // The form signature should match between query and upload requests to the 738 // server. On many websites, form elements are dynamically added, removed, or 739 // rearranged via JavaScript between page load and form submission, so we 740 // copy over the |form_signature_field_names_| corresponding to the query 741 // request. 742 DCHECK_EQ(cached_form.form_name_, form_name_); 743 DCHECK_EQ(cached_form.source_url_, source_url_); 744 DCHECK_EQ(cached_form.target_url_, target_url_); 745 form_signature_field_names_ = cached_form.form_signature_field_names_; 746} 747 748void FormStructure::LogQualityMetrics( 749 const AutofillMetrics& metric_logger, 750 const base::TimeTicks& load_time, 751 const base::TimeTicks& interaction_time, 752 const base::TimeTicks& submission_time) const { 753 std::string experiment_id = server_experiment_id(); 754 metric_logger.LogServerExperimentIdForUpload(experiment_id); 755 756 size_t num_detected_field_types = 0; 757 bool did_autofill_all_possible_fields = true; 758 bool did_autofill_some_possible_fields = false; 759 for (size_t i = 0; i < field_count(); ++i) { 760 const AutofillField* field = this->field(i); 761 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED, 762 experiment_id); 763 764 // No further logging for empty fields nor for fields where the entered data 765 // does not appear to already exist in the user's stored Autofill data. 766 const ServerFieldTypeSet& field_types = field->possible_types(); 767 DCHECK(!field_types.empty()); 768 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) 769 continue; 770 771 ++num_detected_field_types; 772 if (field->is_autofilled) 773 did_autofill_some_possible_fields = true; 774 else 775 did_autofill_all_possible_fields = false; 776 777 // Collapse field types that Chrome treats as identical, e.g. home and 778 // billing address fields. 779 ServerFieldTypeSet collapsed_field_types; 780 for (ServerFieldTypeSet::const_iterator it = field_types.begin(); 781 it != field_types.end(); 782 ++it) { 783 // Since we currently only support US phone numbers, the (city code + main 784 // digits) number is almost always identical to the whole phone number. 785 // TODO(isherman): Improve this logic once we add support for 786 // international numbers. 787 if (*it == PHONE_HOME_CITY_AND_NUMBER) 788 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER); 789 else 790 collapsed_field_types.insert(AutofillType(*it).GetStorableType()); 791 } 792 793 // Capture the field's type, if it is unambiguous. 794 ServerFieldType field_type = UNKNOWN_TYPE; 795 if (collapsed_field_types.size() == 1) 796 field_type = *collapsed_field_types.begin(); 797 798 ServerFieldType heuristic_type = 799 AutofillType(field->heuristic_type()).GetStorableType(); 800 ServerFieldType server_type = 801 AutofillType(field->server_type()).GetStorableType(); 802 ServerFieldType predicted_type = field->Type().GetStorableType(); 803 804 // Log heuristic, server, and overall type quality metrics, independently of 805 // whether the field was autofilled. 806 if (heuristic_type == UNKNOWN_TYPE) { 807 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 808 field_type, experiment_id); 809 } else if (field_types.count(heuristic_type)) { 810 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH, 811 field_type, experiment_id); 812 } else { 813 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH, 814 field_type, experiment_id); 815 } 816 817 if (server_type == NO_SERVER_DATA) { 818 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 819 field_type, experiment_id); 820 } else if (field_types.count(server_type)) { 821 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH, 822 field_type, experiment_id); 823 } else { 824 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH, 825 field_type, experiment_id); 826 } 827 828 if (predicted_type == UNKNOWN_TYPE) { 829 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 830 field_type, experiment_id); 831 } else if (field_types.count(predicted_type)) { 832 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH, 833 field_type, experiment_id); 834 } else { 835 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH, 836 field_type, experiment_id); 837 } 838 839 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we 840 // have to skip them for the remaining metrics. 841 if (field->form_control_type == "select-one") 842 continue; 843 844 if (field->is_autofilled) { 845 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED, 846 experiment_id); 847 } else { 848 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED, 849 experiment_id); 850 851 if (heuristic_type == UNKNOWN_TYPE) { 852 metric_logger.LogQualityMetric( 853 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN, 854 experiment_id); 855 } else if (field_types.count(heuristic_type)) { 856 metric_logger.LogQualityMetric( 857 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH, 858 experiment_id); 859 } else { 860 metric_logger.LogQualityMetric( 861 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH, 862 experiment_id); 863 } 864 865 if (server_type == NO_SERVER_DATA) { 866 metric_logger.LogQualityMetric( 867 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN, 868 experiment_id); 869 } else if (field_types.count(server_type)) { 870 metric_logger.LogQualityMetric( 871 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH, 872 experiment_id); 873 } else { 874 metric_logger.LogQualityMetric( 875 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH, 876 experiment_id); 877 } 878 } 879 } 880 881 if (num_detected_field_types < kRequiredAutofillFields) { 882 metric_logger.LogUserHappinessMetric( 883 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM); 884 } else { 885 if (did_autofill_all_possible_fields) { 886 metric_logger.LogUserHappinessMetric( 887 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL); 888 } else if (did_autofill_some_possible_fields) { 889 metric_logger.LogUserHappinessMetric( 890 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME); 891 } else { 892 metric_logger.LogUserHappinessMetric( 893 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE); 894 } 895 896 // Unlike the other times, the |submission_time| should always be available. 897 DCHECK(!submission_time.is_null()); 898 899 // The |load_time| might be unset, in the case that the form was dynamically 900 // added to the DOM. 901 if (!load_time.is_null()) { 902 // Submission should always chronologically follow form load. 903 DCHECK(submission_time > load_time); 904 base::TimeDelta elapsed = submission_time - load_time; 905 if (did_autofill_some_possible_fields) 906 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed); 907 else 908 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed); 909 } 910 911 // The |interaction_time| might be unset, in the case that the user 912 // submitted a blank form. 913 if (!interaction_time.is_null()) { 914 // Submission should always chronologically follow interaction. 915 DCHECK(submission_time > interaction_time); 916 base::TimeDelta elapsed = submission_time - interaction_time; 917 if (did_autofill_some_possible_fields) { 918 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed); 919 } else { 920 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill( 921 elapsed); 922 } 923 } 924 } 925} 926 927const AutofillField* FormStructure::field(size_t index) const { 928 if (index >= fields_.size()) { 929 NOTREACHED(); 930 return NULL; 931 } 932 933 return fields_[index]; 934} 935 936AutofillField* FormStructure::field(size_t index) { 937 return const_cast<AutofillField*>( 938 static_cast<const FormStructure*>(this)->field(index)); 939} 940 941size_t FormStructure::field_count() const { 942 return fields_.size(); 943} 944 945size_t FormStructure::active_field_count() const { 946 return active_field_count_; 947} 948 949std::string FormStructure::server_experiment_id() const { 950 return server_experiment_id_; 951} 952 953FormData FormStructure::ToFormData() const { 954 // |data.user_submitted| will always be false. 955 FormData data; 956 data.name = form_name_; 957 data.origin = source_url_; 958 data.action = target_url_; 959 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET"); 960 961 for (size_t i = 0; i < fields_.size(); ++i) { 962 data.fields.push_back(FormFieldData(*fields_[i])); 963 } 964 965 return data; 966} 967 968bool FormStructure::operator==(const FormData& form) const { 969 // TODO(jhawkins): Is this enough to differentiate a form? 970 if (form_name_ == form.name && 971 source_url_ == form.origin && 972 target_url_ == form.action) { 973 return true; 974 } 975 976 // TODO(jhawkins): Compare field names, IDs and labels once we have labels 977 // set up. 978 979 return false; 980} 981 982bool FormStructure::operator!=(const FormData& form) const { 983 return !operator==(form); 984} 985 986std::string FormStructure::Hash64Bit(const std::string& str) { 987 std::string hash_bin = base::SHA1HashString(str); 988 DCHECK_EQ(20U, hash_bin.length()); 989 990 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) | 991 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) | 992 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) | 993 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) | 994 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) | 995 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) | 996 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) | 997 ((static_cast<uint64>(hash_bin[7])) & 0xFF); 998 999 return base::Uint64ToString(hash64); 1000} 1001 1002bool FormStructure::EncodeFormRequest( 1003 FormStructure::EncodeRequestType request_type, 1004 buzz::XmlElement* encompassing_xml_element) const { 1005 if (!field_count()) // Nothing to add. 1006 return false; 1007 1008 // Some badly formatted web sites repeat fields - limit number of fields to 1009 // 48, which is far larger than any valid form and XML still fits into 2K. 1010 // Do not send requests for forms with more than this many fields, as they are 1011 // near certainly not valid/auto-fillable. 1012 const size_t kMaxFieldsOnTheForm = 48; 1013 if (field_count() > kMaxFieldsOnTheForm) 1014 return false; 1015 1016 // Add the child nodes for the form fields. 1017 for (size_t index = 0; index < field_count(); ++index) { 1018 const AutofillField* field = fields_[index]; 1019 switch (request_type) { 1020 case FormStructure::UPLOAD: 1021 EncodeFieldForUpload(*field, encompassing_xml_element); 1022 break; 1023 case FormStructure::QUERY: 1024 if (ShouldSkipField(*field)) 1025 continue; 1026 EncodeFieldForQuery(*field, encompassing_xml_element); 1027 break; 1028 case FormStructure::FIELD_ASSIGNMENTS: 1029 EncodeFieldForFieldAssignments(*field, encompassing_xml_element); 1030 break; 1031 } 1032 } 1033 return true; 1034} 1035 1036void FormStructure::ParseFieldTypesFromAutocompleteAttributes( 1037 bool* found_types, 1038 bool* found_sections) { 1039 const std::string kDefaultSection = "-default"; 1040 1041 *found_types = false; 1042 *found_sections = false; 1043 for (std::vector<AutofillField*>::iterator it = fields_.begin(); 1044 it != fields_.end(); ++it) { 1045 AutofillField* field = *it; 1046 1047 // To prevent potential section name collisions, add a default suffix for 1048 // other fields. Without this, 'autocomplete' attribute values 1049 // "section--shipping street-address" and "shipping street-address" would be 1050 // parsed identically, given the section handling code below. We do this 1051 // before any validation so that fields with invalid attributes still end up 1052 // in the default section. These default section names will be overridden 1053 // by subsequent heuristic parsing steps if there are no author-specified 1054 // section names. 1055 field->set_section(kDefaultSection); 1056 1057 // Canonicalize the attribute value by trimming whitespace, collapsing 1058 // non-space characters (e.g. tab) to spaces, and converting to lowercase. 1059 std::string autocomplete_attribute = 1060 CollapseWhitespaceASCII(field->autocomplete_attribute, false); 1061 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute); 1062 1063 // The autocomplete attribute is overloaded: it can specify either a field 1064 // type hint or whether autocomplete should be enabled at all. Ignore the 1065 // latter type of attribute value. 1066 if (autocomplete_attribute.empty() || 1067 autocomplete_attribute == "on" || 1068 autocomplete_attribute == "off") { 1069 continue; 1070 } 1071 1072 // Any other value, even it is invalid, is considered to be a type hint. 1073 // This allows a website's author to specify an attribute like 1074 // autocomplete="other" on a field to disable all Autofill heuristics for 1075 // the form. 1076 *found_types = true; 1077 1078 // Tokenize the attribute value. Per the spec, the tokens are parsed in 1079 // reverse order. 1080 std::vector<std::string> tokens; 1081 Tokenize(autocomplete_attribute, " ", &tokens); 1082 1083 // The final token must be the field type. 1084 // If it is not one of the known types, abort. 1085 DCHECK(!tokens.empty()); 1086 std::string field_type_token = tokens.back(); 1087 tokens.pop_back(); 1088 HtmlFieldType field_type = 1089 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field); 1090 if (field_type == HTML_TYPE_UNKNOWN) 1091 continue; 1092 1093 // The preceding token, if any, may be a type hint. 1094 if (!tokens.empty() && IsContactTypeHint(tokens.back())) { 1095 // If it is, it must match the field type; otherwise, abort. 1096 // Note that an invalid token invalidates the entire attribute value, even 1097 // if the other tokens are valid. 1098 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type)) 1099 continue; 1100 1101 // Chrome Autofill ignores these type hints. 1102 tokens.pop_back(); 1103 } 1104 1105 // The preceding token, if any, may be a fixed string that is either 1106 // "shipping" or "billing". Chrome Autofill treats these as implicit 1107 // section name suffixes. 1108 DCHECK_EQ(kDefaultSection, field->section()); 1109 std::string section = field->section(); 1110 HtmlFieldMode mode = HTML_MODE_NONE; 1111 if (!tokens.empty()) { 1112 if (tokens.back() == kShippingMode) 1113 mode = HTML_MODE_SHIPPING; 1114 else if (tokens.back() == kBillingMode) 1115 mode = HTML_MODE_BILLING; 1116 } 1117 1118 if (mode != HTML_MODE_NONE) { 1119 section = "-" + tokens.back(); 1120 tokens.pop_back(); 1121 } 1122 1123 // The preceding token, if any, may be a named section. 1124 const std::string kSectionPrefix = "section-"; 1125 if (!tokens.empty() && 1126 StartsWithASCII(tokens.back(), kSectionPrefix, true)) { 1127 // Prepend this section name to the suffix set in the preceding block. 1128 section = tokens.back().substr(kSectionPrefix.size()) + section; 1129 tokens.pop_back(); 1130 } 1131 1132 // No other tokens are allowed. If there are any remaining, abort. 1133 if (!tokens.empty()) 1134 continue; 1135 1136 if (section != kDefaultSection) { 1137 *found_sections = true; 1138 field->set_section(section); 1139 } 1140 1141 // No errors encountered while parsing! 1142 // Update the |field|'s type based on what was parsed from the attribute. 1143 field->SetHtmlType(field_type, mode); 1144 } 1145} 1146 1147void FormStructure::IdentifySections(bool has_author_specified_sections) { 1148 if (fields_.empty()) 1149 return; 1150 1151 if (!has_author_specified_sections) { 1152 // Name sections after the first field in the section. 1153 base::string16 current_section = fields_.front()->unique_name(); 1154 1155 // Keep track of the types we've seen in this section. 1156 std::set<ServerFieldType> seen_types; 1157 ServerFieldType previous_type = UNKNOWN_TYPE; 1158 1159 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1160 field != fields_.end(); ++field) { 1161 const ServerFieldType current_type = (*field)->Type().GetStorableType(); 1162 1163 bool already_saw_current_type = seen_types.count(current_type) > 0; 1164 1165 // Forms often ask for multiple phone numbers -- e.g. both a daytime and 1166 // evening phone number. Our phone number detection is also generally a 1167 // little off. Hence, ignore this field type as a signal here. 1168 if (AutofillType(current_type).group() == PHONE_HOME) 1169 already_saw_current_type = false; 1170 1171 // Some forms have adjacent fields of the same type. Two common examples: 1172 // * Forms with two email fields, where the second is meant to "confirm" 1173 // the first. 1174 // * Forms with a <select> menu for states in some countries, and a 1175 // freeform <input> field for states in other countries. (Usually, 1176 // only one of these two will be visible for any given choice of 1177 // country.) 1178 // Generally, adjacent fields of the same type belong in the same logical 1179 // section. 1180 if (current_type == previous_type) 1181 already_saw_current_type = false; 1182 1183 previous_type = current_type; 1184 1185 if (current_type != UNKNOWN_TYPE && already_saw_current_type) { 1186 // We reached the end of a section, so start a new section. 1187 seen_types.clear(); 1188 current_section = (*field)->unique_name(); 1189 } 1190 1191 seen_types.insert(current_type); 1192 (*field)->set_section(UTF16ToUTF8(current_section)); 1193 } 1194 } 1195 1196 // Ensure that credit card and address fields are in separate sections. 1197 // This simplifies the section-aware logic in autofill_manager.cc. 1198 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1199 field != fields_.end(); ++field) { 1200 FieldTypeGroup field_type_group = (*field)->Type().group(); 1201 if (field_type_group == CREDIT_CARD) 1202 (*field)->set_section((*field)->section() + "-cc"); 1203 else 1204 (*field)->set_section((*field)->section() + "-default"); 1205 } 1206} 1207 1208} // namespace autofill 1209