form_structure.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/form_structure.h" 6 7#include <utility> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/logging.h" 12#include "base/memory/scoped_ptr.h" 13#include "base/sha1.h" 14#include "base/strings/string_number_conversions.h" 15#include "base/strings/string_util.h" 16#include "base/strings/stringprintf.h" 17#include "base/strings/utf_string_conversions.h" 18#include "base/time/time.h" 19#include "components/autofill/content/browser/autocheckout_page_meta_data.h" 20#include "components/autofill/core/browser/autofill_metrics.h" 21#include "components/autofill/core/browser/autofill_type.h" 22#include "components/autofill/core/browser/autofill_xml_parser.h" 23#include "components/autofill/core/browser/field_types.h" 24#include "components/autofill/core/browser/form_field.h" 25#include "components/autofill/core/common/autofill_constants.h" 26#include "components/autofill/core/common/form_data.h" 27#include "components/autofill/core/common/form_data_predictions.h" 28#include "components/autofill/core/common/form_field_data.h" 29#include "components/autofill/core/common/form_field_data_predictions.h" 30#include "third_party/libjingle/source/talk/xmllite/xmlelement.h" 31 32namespace autofill { 33namespace { 34 35const char kFormMethodPost[] = "post"; 36 37// XML elements and attributes. 38const char kAttributeAcceptedFeatures[] = "accepts"; 39const char kAttributeAutofillUsed[] = "autofillused"; 40const char kAttributeAutofillType[] = "autofilltype"; 41const char kAttributeClientVersion[] = "clientversion"; 42const char kAttributeDataPresent[] = "datapresent"; 43const char kAttributeFieldID[] = "fieldid"; 44const char kAttributeFieldType[] = "fieldtype"; 45const char kAttributeFormSignature[] = "formsignature"; 46const char kAttributeName[] = "name"; 47const char kAttributeSignature[] = "signature"; 48const char kAttributeUrlprefixSignature[] = "urlprefixsignature"; 49const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments 50const char kAcceptedFeaturesAutocheckoutExperiment[] = "a,e"; // a=autocheckout 51const char kClientVersion[] = "6.1.1715.1442/en (GGLL)"; 52const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 53const char kXMLElementAutofillQuery[] = "autofillquery"; 54const char kXMLElementAutofillUpload[] = "autofillupload"; 55const char kXMLElementFieldAssignments[] = "fieldassignments"; 56const char kXMLElementField[] = "field"; 57const char kXMLElementFields[] = "fields"; 58const char kXMLElementForm[] = "form"; 59const char kBillingSection[] = "billing"; 60const char kShippingSection[] = "shipping"; 61 62// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to 63// |available_field_types| and returns the hex representation as a string. 64std::string EncodeFieldTypes(const FieldTypeSet& available_field_types) { 65 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte, 66 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field. 67 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8; 68 69 // Pack the types in |available_field_types| into |bit_field|. 70 std::vector<uint8> bit_field(kNumBytes, 0); 71 for (FieldTypeSet::const_iterator field_type = available_field_types.begin(); 72 field_type != available_field_types.end(); 73 ++field_type) { 74 // Set the appropriate bit in the field. The bit we set is the one 75 // |field_type| % 8 from the left of the byte. 76 const size_t byte = *field_type / 8; 77 const size_t bit = 0x80 >> (*field_type % 8); 78 DCHECK(byte < bit_field.size()); 79 bit_field[byte] |= bit; 80 } 81 82 // Discard any trailing zeroes. 83 // If there are no available types, we return the empty string. 84 size_t data_end = bit_field.size(); 85 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) { 86 } 87 88 // Print all meaningfull bytes into a string. 89 std::string data_presence; 90 data_presence.reserve(data_end * 2 + 1); 91 for (size_t i = 0; i < data_end; ++i) { 92 base::StringAppendF(&data_presence, "%02x", bit_field[i]); 93 } 94 95 return data_presence; 96} 97 98// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 99// in upload xml, and also add them to the parent XmlElement. 100void EncodeFieldForUpload(const AutofillField& field, 101 buzz::XmlElement* parent) { 102 // Don't upload checkable fields. 103 if (field.is_checkable) 104 return; 105 106 FieldTypeSet types = field.possible_types(); 107 // |types| could be empty in unit-tests only. 108 for (FieldTypeSet::iterator field_type = types.begin(); 109 field_type != types.end(); ++field_type) { 110 buzz::XmlElement *field_element = new buzz::XmlElement( 111 buzz::QName(kXMLElementField)); 112 113 field_element->SetAttr(buzz::QName(kAttributeSignature), 114 field.FieldSignature()); 115 field_element->SetAttr(buzz::QName(kAttributeAutofillType), 116 base::IntToString(*field_type)); 117 parent->AddElement(field_element); 118 } 119} 120 121// Helper for |EncodeFormRequest()| that creates XmlElement for the given field 122// in query xml, and also add it to the parent XmlElement. 123void EncodeFieldForQuery(const AutofillField& field, 124 buzz::XmlElement* parent) { 125 buzz::XmlElement *field_element = new buzz::XmlElement( 126 buzz::QName(kXMLElementField)); 127 field_element->SetAttr(buzz::QName(kAttributeSignature), 128 field.FieldSignature()); 129 parent->AddElement(field_element); 130} 131 132// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 133// in field assignments xml, and also add them to the parent XmlElement. 134void EncodeFieldForFieldAssignments(const AutofillField& field, 135 buzz::XmlElement* parent) { 136 FieldTypeSet types = field.possible_types(); 137 for (FieldTypeSet::iterator field_type = types.begin(); 138 field_type != types.end(); ++field_type) { 139 buzz::XmlElement *field_element = new buzz::XmlElement( 140 buzz::QName(kXMLElementFields)); 141 142 field_element->SetAttr(buzz::QName(kAttributeFieldID), 143 field.FieldSignature()); 144 field_element->SetAttr(buzz::QName(kAttributeFieldType), 145 base::IntToString(*field_type)); 146 field_element->SetAttr(buzz::QName(kAttributeName), 147 UTF16ToUTF8(field.name)); 148 parent->AddElement(field_element); 149 } 150} 151 152// Returns |true| iff the |token| is a type hint for a contact field, as 153// specified in the implementation section of http://is.gd/whatwg_autocomplete 154// Note that "fax" and "pager" are intentionally ignored, as Chrome does not 155// support filling either type of information. 156bool IsContactTypeHint(const std::string& token) { 157 return token == "home" || token == "work" || token == "mobile"; 158} 159 160// Returns |true| iff the |token| is a type hint appropriate for a field of the 161// given |field_type|, as specified in the implementation section of 162// http://is.gd/whatwg_autocomplete 163bool ContactTypeHintMatchesFieldType(const std::string& token, 164 AutofillFieldType field_type) { 165 // The "home" and "work" type hints are only appropriate for email and phone 166 // number field types. 167 if (token == "home" || token == "work") { 168 return field_type == EMAIL_ADDRESS || 169 (field_type >= PHONE_HOME_NUMBER && 170 field_type <= PHONE_HOME_WHOLE_NUMBER); 171 } 172 173 // The "mobile" type hint is only appropriate for phone number field types. 174 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not 175 // support filling either type of information. 176 if (token == "mobile") { 177 return field_type >= PHONE_HOME_NUMBER && 178 field_type <= PHONE_HOME_WHOLE_NUMBER; 179 } 180 181 return false; 182} 183 184// Returns the Chrome Autofill-supported field type corresponding to the given 185// |autocomplete_type|, if there is one, in the context of the given |field|. 186// Chrome Autofill supports a subset of the field types listed at 187// http://is.gd/whatwg_autocomplete 188AutofillFieldType FieldTypeFromAutocompleteType( 189 const std::string& autocomplete_type, 190 const AutofillField& field) { 191 if (autocomplete_type == "name") 192 return NAME_FULL; 193 194 if (autocomplete_type == "given-name") 195 return NAME_FIRST; 196 197 if (autocomplete_type == "additional-name") { 198 if (field.max_length == 1) 199 return NAME_MIDDLE_INITIAL; 200 else 201 return NAME_MIDDLE; 202 } 203 204 if (autocomplete_type == "family-name") 205 return NAME_LAST; 206 207 if (autocomplete_type == "honorific-suffix") 208 return NAME_SUFFIX; 209 210 if (autocomplete_type == "organization") 211 return COMPANY_NAME; 212 213 if (autocomplete_type == "street-address" || 214 autocomplete_type == "address-line1") 215 return ADDRESS_HOME_LINE1; 216 217 if (autocomplete_type == "address-line2") 218 return ADDRESS_HOME_LINE2; 219 220 if (autocomplete_type == "locality") 221 return ADDRESS_HOME_CITY; 222 223 if (autocomplete_type == "region") 224 return ADDRESS_HOME_STATE; 225 226 if (autocomplete_type == "country") 227 return ADDRESS_HOME_COUNTRY; 228 229 if (autocomplete_type == "postal-code") 230 return ADDRESS_HOME_ZIP; 231 232 if (autocomplete_type == "cc-name") 233 return CREDIT_CARD_NAME; 234 235 if (autocomplete_type == "cc-number") 236 return CREDIT_CARD_NUMBER; 237 238 if (autocomplete_type == "cc-exp") { 239 if (field.max_length == 5) 240 return CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR; 241 else 242 return CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR; 243 } 244 245 if (autocomplete_type == "cc-exp-month") 246 return CREDIT_CARD_EXP_MONTH; 247 248 if (autocomplete_type == "cc-exp-year") { 249 if (field.max_length == 2) 250 return CREDIT_CARD_EXP_2_DIGIT_YEAR; 251 else 252 return CREDIT_CARD_EXP_4_DIGIT_YEAR; 253 } 254 255 if (autocomplete_type == "cc-csc") 256 return CREDIT_CARD_VERIFICATION_CODE; 257 258 if (autocomplete_type == "cc-type") 259 return CREDIT_CARD_TYPE; 260 261 if (autocomplete_type == "tel") 262 return PHONE_HOME_WHOLE_NUMBER; 263 264 if (autocomplete_type == "tel-country-code") 265 return PHONE_HOME_COUNTRY_CODE; 266 267 if (autocomplete_type == "tel-national") 268 return PHONE_HOME_CITY_AND_NUMBER; 269 270 if (autocomplete_type == "tel-area-code") 271 return PHONE_HOME_CITY_CODE; 272 273 if (autocomplete_type == "tel-local") 274 return PHONE_HOME_NUMBER; 275 276 if (autocomplete_type == "tel-local-prefix") 277 return PHONE_HOME_NUMBER; 278 279 if (autocomplete_type == "tel-local-suffix") 280 return PHONE_HOME_NUMBER; 281 282 if (autocomplete_type == "email") 283 return EMAIL_ADDRESS; 284 285 return UNKNOWN_TYPE; 286} 287 288} // namespace 289 290FormStructure::FormStructure(const FormData& form, 291 const std::string& autocheckout_url_prefix) 292 : form_name_(form.name), 293 source_url_(form.origin), 294 target_url_(form.action), 295 autofill_count_(0), 296 active_field_count_(0), 297 upload_required_(USE_UPLOAD_RATES), 298 server_experiment_id_("no server response"), 299 has_author_specified_types_(false), 300 autocheckout_url_prefix_(autocheckout_url_prefix), 301 filled_by_autocheckout_(false) { 302 // Copy the form fields. 303 std::map<base::string16, size_t> unique_names; 304 for (std::vector<FormFieldData>::const_iterator field = 305 form.fields.begin(); 306 field != form.fields.end(); field++) { 307 308 if (!ShouldSkipField(*field)) { 309 // Add all supported form fields (including with empty names) to the 310 // signature. This is a requirement for Autofill servers. 311 form_signature_field_names_.append("&"); 312 form_signature_field_names_.append(UTF16ToUTF8(field->name)); 313 314 ++active_field_count_; 315 } 316 317 // Generate a unique name for this field by appending a counter to the name. 318 // Make sure to prepend the counter with a non-numeric digit so that we are 319 // guaranteed to avoid collisions. 320 if (!unique_names.count(field->name)) 321 unique_names[field->name] = 1; 322 else 323 ++unique_names[field->name]; 324 base::string16 unique_name = field->name + ASCIIToUTF16("_") + 325 base::IntToString16(unique_names[field->name]); 326 fields_.push_back(new AutofillField(*field, unique_name)); 327 } 328 329 std::string method = UTF16ToUTF8(form.method); 330 if (StringToLowerASCII(method) == kFormMethodPost) { 331 method_ = POST; 332 } else { 333 // Either the method is 'get', or we don't know. In this case we default 334 // to GET. 335 method_ = GET; 336 } 337} 338 339FormStructure::~FormStructure() {} 340 341void FormStructure::DetermineHeuristicTypes( 342 const AutofillMetrics& metric_logger) { 343 // First, try to detect field types based on each field's |autocomplete| 344 // attribute value. If there is at least one form field that specifies an 345 // autocomplete type hint, don't try to apply other heuristics to match fields 346 // in this form. 347 bool has_author_specified_sections; 348 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_, 349 &has_author_specified_sections); 350 351 if (!has_author_specified_types_) { 352 FieldTypeMap field_type_map; 353 FormField::ParseFormFields(fields_.get(), &field_type_map); 354 for (size_t index = 0; index < field_count(); index++) { 355 AutofillField* field = fields_[index]; 356 FieldTypeMap::iterator iter = field_type_map.find(field->unique_name()); 357 if (iter != field_type_map.end()) 358 field->set_heuristic_type(iter->second); 359 } 360 } 361 362 UpdateAutofillCount(); 363 IdentifySections(has_author_specified_sections); 364 365 if (IsAutofillable(true)) { 366 metric_logger.LogDeveloperEngagementMetric( 367 AutofillMetrics::FILLABLE_FORM_PARSED); 368 if (has_author_specified_types_) { 369 metric_logger.LogDeveloperEngagementMetric( 370 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS); 371 } 372 } 373} 374 375bool FormStructure::EncodeUploadRequest( 376 const FieldTypeSet& available_field_types, 377 bool form_was_autofilled, 378 std::string* encoded_xml) const { 379 DCHECK(ShouldBeCrowdsourced()); 380 381 // Verify that |available_field_types| agrees with the possible field types we 382 // are uploading. 383 for (std::vector<AutofillField*>::const_iterator field = begin(); 384 field != end(); 385 ++field) { 386 for (FieldTypeSet::const_iterator type = (*field)->possible_types().begin(); 387 type != (*field)->possible_types().end(); 388 ++type) { 389 DCHECK(*type == UNKNOWN_TYPE || 390 *type == EMPTY_TYPE || 391 available_field_types.count(*type)); 392 } 393 } 394 395 // Set up the <autofillupload> element and its attributes. 396 buzz::XmlElement autofill_request_xml( 397 (buzz::QName(kXMLElementAutofillUpload))); 398 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 399 kClientVersion); 400 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 401 FormSignature()); 402 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed), 403 form_was_autofilled ? "true" : "false"); 404 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent), 405 EncodeFieldTypes(available_field_types).c_str()); 406 407 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml)) 408 return false; // Malformed form, skip it. 409 410 // Obtain the XML structure as a string. 411 *encoded_xml = kXMLDeclaration; 412 *encoded_xml += autofill_request_xml.Str().c_str(); 413 414 // To enable this logging, run with the flag --vmodule="form_structure=2". 415 VLOG(2) << "\n" << *encoded_xml; 416 417 return true; 418} 419 420bool FormStructure::EncodeFieldAssignments( 421 const FieldTypeSet& available_field_types, 422 std::string* encoded_xml) const { 423 DCHECK(ShouldBeCrowdsourced()); 424 425 // Set up the <fieldassignments> element and its attributes. 426 buzz::XmlElement autofill_request_xml( 427 (buzz::QName(kXMLElementFieldAssignments))); 428 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 429 FormSignature()); 430 431 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS, 432 &autofill_request_xml)) 433 return false; // Malformed form, skip it. 434 435 // Obtain the XML structure as a string. 436 *encoded_xml = kXMLDeclaration; 437 *encoded_xml += autofill_request_xml.Str().c_str(); 438 439 return true; 440} 441 442// static 443bool FormStructure::EncodeQueryRequest( 444 const std::vector<FormStructure*>& forms, 445 std::vector<std::string>* encoded_signatures, 446 std::string* encoded_xml) { 447 DCHECK(encoded_signatures); 448 DCHECK(encoded_xml); 449 encoded_xml->clear(); 450 encoded_signatures->clear(); 451 encoded_signatures->reserve(forms.size()); 452 453 // Set up the <autofillquery> element and attributes. 454 buzz::XmlElement autofill_request_xml( 455 (buzz::QName(kXMLElementAutofillQuery))); 456 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 457 kClientVersion); 458 459 // autocheckout_url_prefix tells the Autofill server where the forms in the 460 // request came from, and the the Autofill server checks internal status and 461 // decide to enable Autocheckout or not and may return Autocheckout related 462 // data in the response accordingly. 463 // There is no page/frame level object associated with FormStructure that 464 // we could extract URL prefix from. But, all the forms should come from the 465 // same frame, so they should have the same Autocheckout URL prefix. Thus we 466 // use URL prefix from the first form with Autocheckout enabled. 467 std::string autocheckout_url_prefix; 468 469 // Some badly formatted web sites repeat forms - detect that and encode only 470 // one form as returned data would be the same for all the repeated forms. 471 std::set<std::string> processed_forms; 472 for (ScopedVector<FormStructure>::const_iterator it = forms.begin(); 473 it != forms.end(); 474 ++it) { 475 std::string signature((*it)->FormSignature()); 476 if (processed_forms.find(signature) != processed_forms.end()) 477 continue; 478 processed_forms.insert(signature); 479 scoped_ptr<buzz::XmlElement> encompassing_xml_element( 480 new buzz::XmlElement(buzz::QName(kXMLElementForm))); 481 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature), 482 signature); 483 484 if (!(*it)->EncodeFormRequest(FormStructure::QUERY, 485 encompassing_xml_element.get())) 486 continue; // Malformed form, skip it. 487 488 if ((*it)->IsAutocheckoutEnabled()) { 489 if (autocheckout_url_prefix.empty()) { 490 autocheckout_url_prefix = (*it)->autocheckout_url_prefix_; 491 } else { 492 // Making sure all the forms in the request has the same url_prefix. 493 DCHECK_EQ(autocheckout_url_prefix, (*it)->autocheckout_url_prefix_); 494 } 495 } 496 497 autofill_request_xml.AddElement(encompassing_xml_element.release()); 498 encoded_signatures->push_back(signature); 499 } 500 501 if (!encoded_signatures->size()) 502 return false; 503 504 if (autocheckout_url_prefix.empty()) { 505 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures), 506 kAcceptedFeaturesExperiment); 507 } else { 508 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures), 509 kAcceptedFeaturesAutocheckoutExperiment); 510 autofill_request_xml.SetAttr(buzz::QName(kAttributeUrlprefixSignature), 511 Hash64Bit(autocheckout_url_prefix)); 512 } 513 514 // Obtain the XML structure as a string. 515 *encoded_xml = kXMLDeclaration; 516 *encoded_xml += autofill_request_xml.Str().c_str(); 517 518 return true; 519} 520 521// static 522void FormStructure::ParseQueryResponse( 523 const std::string& response_xml, 524 const std::vector<FormStructure*>& forms, 525 autofill::AutocheckoutPageMetaData* page_meta_data, 526 const AutofillMetrics& metric_logger) { 527 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED); 528 529 // Parse the field types from the server response to the query. 530 std::vector<AutofillServerFieldInfo> field_infos; 531 UploadRequired upload_required; 532 std::string experiment_id; 533 AutofillQueryXmlParser parse_handler(&field_infos, 534 &upload_required, 535 &experiment_id, 536 page_meta_data); 537 buzz::XmlParser parser(&parse_handler); 538 parser.Parse(response_xml.c_str(), response_xml.length(), true); 539 if (!parse_handler.succeeded()) 540 return; 541 542 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED); 543 metric_logger.LogServerExperimentIdForQuery(experiment_id); 544 545 bool heuristics_detected_fillable_field = false; 546 bool query_response_overrode_heuristics = false; 547 548 // Copy the field types into the actual form. 549 std::vector<AutofillServerFieldInfo>::iterator current_info = 550 field_infos.begin(); 551 for (std::vector<FormStructure*>::const_iterator iter = forms.begin(); 552 iter != forms.end(); ++iter) { 553 FormStructure* form = *iter; 554 form->upload_required_ = upload_required; 555 form->server_experiment_id_ = experiment_id; 556 557 for (std::vector<AutofillField*>::iterator field = form->fields_.begin(); 558 field != form->fields_.end(); ++field) { 559 if (form->ShouldSkipField(**field)) 560 continue; 561 562 // In some cases *successful* response does not return all the fields. 563 // Quit the update of the types then. 564 if (current_info == field_infos.end()) 565 break; 566 567 // UNKNOWN_TYPE is reserved for use by the client. 568 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE); 569 570 AutofillFieldType heuristic_type = (*field)->type(); 571 if (heuristic_type != UNKNOWN_TYPE) 572 heuristics_detected_fillable_field = true; 573 574 (*field)->set_server_type(current_info->field_type); 575 if (heuristic_type != (*field)->type()) 576 query_response_overrode_heuristics = true; 577 578 // Copy default value into the field if available. 579 if (!current_info->default_value.empty()) 580 (*field)->set_default_value(current_info->default_value); 581 582 ++current_info; 583 } 584 585 form->UpdateAutofillCount(); 586 form->IdentifySections(false); 587 } 588 589 AutofillMetrics::ServerQueryMetric metric; 590 if (query_response_overrode_heuristics) { 591 if (heuristics_detected_fillable_field) { 592 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS; 593 } else { 594 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS; 595 } 596 } else { 597 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS; 598 } 599 metric_logger.LogServerQueryMetric(metric); 600} 601 602// static 603void FormStructure::GetFieldTypePredictions( 604 const std::vector<FormStructure*>& form_structures, 605 std::vector<FormDataPredictions>* forms) { 606 forms->clear(); 607 forms->reserve(form_structures.size()); 608 for (size_t i = 0; i < form_structures.size(); ++i) { 609 FormStructure* form_structure = form_structures[i]; 610 FormDataPredictions form; 611 form.data.name = form_structure->form_name_; 612 form.data.method = 613 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET"); 614 form.data.origin = form_structure->source_url_; 615 form.data.action = form_structure->target_url_; 616 form.signature = form_structure->FormSignature(); 617 form.experiment_id = form_structure->server_experiment_id_; 618 619 for (std::vector<AutofillField*>::const_iterator field = 620 form_structure->fields_.begin(); 621 field != form_structure->fields_.end(); ++field) { 622 form.data.fields.push_back(FormFieldData(**field)); 623 624 FormFieldDataPredictions annotated_field; 625 annotated_field.signature = (*field)->FieldSignature(); 626 annotated_field.heuristic_type = 627 AutofillType::FieldTypeToString((*field)->heuristic_type()); 628 annotated_field.server_type = 629 AutofillType::FieldTypeToString((*field)->server_type()); 630 annotated_field.overall_type = 631 AutofillType::FieldTypeToString((*field)->type()); 632 form.fields.push_back(annotated_field); 633 } 634 635 forms->push_back(form); 636 } 637} 638 639std::string FormStructure::FormSignature() const { 640 std::string scheme(target_url_.scheme()); 641 std::string host(target_url_.host()); 642 643 // If target host or scheme is empty, set scheme and host of source url. 644 // This is done to match the Toolbar's behavior. 645 if (scheme.empty() || host.empty()) { 646 scheme = source_url_.scheme(); 647 host = source_url_.host(); 648 } 649 650 std::string form_string = scheme + "://" + host + "&" + 651 UTF16ToUTF8(form_name_) + 652 form_signature_field_names_; 653 654 return Hash64Bit(form_string); 655} 656 657bool FormStructure::IsAutocheckoutEnabled() const { 658 return !autocheckout_url_prefix_.empty(); 659} 660 661bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 662 return (field.is_checkable || field.form_control_type == "password") && 663 !IsAutocheckoutEnabled(); 664} 665 666size_t FormStructure::RequiredFillableFields() const { 667 return IsAutocheckoutEnabled() ? 0 : kRequiredAutofillFields; 668} 669 670bool FormStructure::IsAutofillable(bool require_method_post) const { 671 if (autofill_count() < RequiredFillableFields()) 672 return false; 673 674 return ShouldBeParsed(require_method_post); 675} 676 677void FormStructure::UpdateAutofillCount() { 678 autofill_count_ = 0; 679 for (std::vector<AutofillField*>::const_iterator iter = begin(); 680 iter != end(); ++iter) { 681 AutofillField* field = *iter; 682 if (field && field->IsFieldFillable()) 683 ++autofill_count_; 684 } 685} 686 687bool FormStructure::ShouldBeParsed(bool require_method_post) const { 688 if (active_field_count() < RequiredFillableFields()) 689 return false; 690 691 // Rule out http(s)://*/search?... 692 // e.g. http://www.google.com/search?q=... 693 // http://search.yahoo.com/search?p=... 694 if (target_url_.path() == "/search") 695 return false; 696 697 if (!IsAutocheckoutEnabled()) { 698 // Make sure there is at least one text field when Autocheckout is 699 // not enabled. 700 bool has_text_field = false; 701 for (std::vector<AutofillField*>::const_iterator it = begin(); 702 it != end() && !has_text_field; ++it) { 703 has_text_field |= (*it)->form_control_type != "select-one"; 704 } 705 if (!has_text_field) 706 return false; 707 } 708 709 return !require_method_post || (method_ == POST); 710} 711 712bool FormStructure::ShouldBeCrowdsourced() const { 713 // Allow all forms in Autocheckout flow to be crowdsourced. 714 return (!has_author_specified_types_ && ShouldBeParsed(true)) || 715 IsAutocheckoutEnabled(); 716} 717 718void FormStructure::UpdateFromCache(const FormStructure& cached_form) { 719 // Map from field signatures to cached fields. 720 std::map<std::string, const AutofillField*> cached_fields; 721 for (size_t i = 0; i < cached_form.field_count(); ++i) { 722 const AutofillField* field = cached_form.field(i); 723 cached_fields[field->FieldSignature()] = field; 724 } 725 726 for (std::vector<AutofillField*>::const_iterator iter = begin(); 727 iter != end(); ++iter) { 728 AutofillField* field = *iter; 729 730 std::map<std::string, const AutofillField*>::const_iterator 731 cached_field = cached_fields.find(field->FieldSignature()); 732 if (cached_field != cached_fields.end()) { 733 if (field->form_control_type != "select-one" && 734 field->value == cached_field->second->value) { 735 // From the perspective of learning user data, text fields containing 736 // default values are equivalent to empty fields. 737 field->value = base::string16(); 738 } 739 740 field->set_heuristic_type(cached_field->second->heuristic_type()); 741 field->set_server_type(cached_field->second->server_type()); 742 } 743 } 744 745 UpdateAutofillCount(); 746 747 filled_by_autocheckout_ = cached_form.filled_by_autocheckout(); 748 server_experiment_id_ = cached_form.server_experiment_id(); 749 750 // The form signature should match between query and upload requests to the 751 // server. On many websites, form elements are dynamically added, removed, or 752 // rearranged via JavaScript between page load and form submission, so we 753 // copy over the |form_signature_field_names_| corresponding to the query 754 // request. 755 DCHECK_EQ(cached_form.form_name_, form_name_); 756 DCHECK_EQ(cached_form.source_url_, source_url_); 757 DCHECK_EQ(cached_form.target_url_, target_url_); 758 form_signature_field_names_ = cached_form.form_signature_field_names_; 759} 760 761void FormStructure::LogQualityMetrics( 762 const AutofillMetrics& metric_logger, 763 const base::TimeTicks& load_time, 764 const base::TimeTicks& interaction_time, 765 const base::TimeTicks& submission_time) const { 766 std::string experiment_id = server_experiment_id(); 767 metric_logger.LogServerExperimentIdForUpload(experiment_id); 768 769 size_t num_detected_field_types = 0; 770 bool did_autofill_all_possible_fields = true; 771 bool did_autofill_some_possible_fields = false; 772 for (size_t i = 0; i < field_count(); ++i) { 773 const AutofillField* field = this->field(i); 774 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED, 775 experiment_id); 776 777 // No further logging for empty fields nor for fields where the entered data 778 // does not appear to already exist in the user's stored Autofill data. 779 const FieldTypeSet& field_types = field->possible_types(); 780 DCHECK(!field_types.empty()); 781 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) 782 continue; 783 784 ++num_detected_field_types; 785 if (field->is_autofilled) 786 did_autofill_some_possible_fields = true; 787 else 788 did_autofill_all_possible_fields = false; 789 790 // Collapse field types that Chrome treats as identical, e.g. home and 791 // billing address fields. 792 FieldTypeSet collapsed_field_types; 793 for (FieldTypeSet::const_iterator it = field_types.begin(); 794 it != field_types.end(); 795 ++it) { 796 // Since we currently only support US phone numbers, the (city code + main 797 // digits) number is almost always identical to the whole phone number. 798 // TODO(isherman): Improve this logic once we add support for 799 // international numbers. 800 if (*it == PHONE_HOME_CITY_AND_NUMBER) 801 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER); 802 else 803 collapsed_field_types.insert(AutofillType::GetEquivalentFieldType(*it)); 804 } 805 806 // Capture the field's type, if it is unambiguous. 807 AutofillFieldType field_type = UNKNOWN_TYPE; 808 if (collapsed_field_types.size() == 1) 809 field_type = *collapsed_field_types.begin(); 810 811 AutofillFieldType heuristic_type = field->heuristic_type(); 812 AutofillFieldType server_type = field->server_type(); 813 AutofillFieldType predicted_type = field->type(); 814 815 // Log heuristic, server, and overall type quality metrics, independently of 816 // whether the field was autofilled. 817 if (heuristic_type == UNKNOWN_TYPE) { 818 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 819 field_type, experiment_id); 820 } else if (field_types.count(heuristic_type)) { 821 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH, 822 field_type, experiment_id); 823 } else { 824 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH, 825 field_type, experiment_id); 826 } 827 828 if (server_type == NO_SERVER_DATA) { 829 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 830 field_type, experiment_id); 831 } else if (field_types.count(server_type)) { 832 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH, 833 field_type, experiment_id); 834 } else { 835 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH, 836 field_type, experiment_id); 837 } 838 839 if (predicted_type == UNKNOWN_TYPE) { 840 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 841 field_type, experiment_id); 842 } else if (field_types.count(predicted_type)) { 843 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH, 844 field_type, experiment_id); 845 } else { 846 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH, 847 field_type, experiment_id); 848 } 849 850 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we 851 // have to skip them for the remaining metrics. 852 if (field->form_control_type == "select-one") 853 continue; 854 855 if (field->is_autofilled) { 856 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED, 857 experiment_id); 858 } else { 859 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED, 860 experiment_id); 861 862 if (heuristic_type == UNKNOWN_TYPE) { 863 metric_logger.LogQualityMetric( 864 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN, 865 experiment_id); 866 } else if (field_types.count(heuristic_type)) { 867 metric_logger.LogQualityMetric( 868 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH, 869 experiment_id); 870 } else { 871 metric_logger.LogQualityMetric( 872 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH, 873 experiment_id); 874 } 875 876 if (server_type == NO_SERVER_DATA) { 877 metric_logger.LogQualityMetric( 878 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN, 879 experiment_id); 880 } else if (field_types.count(server_type)) { 881 metric_logger.LogQualityMetric( 882 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH, 883 experiment_id); 884 } else { 885 metric_logger.LogQualityMetric( 886 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH, 887 experiment_id); 888 } 889 } 890 } 891 892 if (num_detected_field_types < RequiredFillableFields()) { 893 metric_logger.LogUserHappinessMetric( 894 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM); 895 } else { 896 if (did_autofill_all_possible_fields) { 897 metric_logger.LogUserHappinessMetric( 898 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL); 899 } else if (did_autofill_some_possible_fields) { 900 metric_logger.LogUserHappinessMetric( 901 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME); 902 } else { 903 metric_logger.LogUserHappinessMetric( 904 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE); 905 } 906 907 // Unlike the other times, the |submission_time| should always be available. 908 DCHECK(!submission_time.is_null()); 909 910 // The |load_time| might be unset, in the case that the form was dynamically 911 // added to the DOM. 912 if (!load_time.is_null()) { 913 // Submission should always chronologically follow form load. 914 DCHECK(submission_time > load_time); 915 base::TimeDelta elapsed = submission_time - load_time; 916 if (did_autofill_some_possible_fields) 917 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed); 918 else 919 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed); 920 } 921 922 // The |interaction_time| might be unset, in the case that the user 923 // submitted a blank form. 924 if (!interaction_time.is_null()) { 925 // Submission should always chronologically follow interaction. 926 DCHECK(submission_time > interaction_time); 927 base::TimeDelta elapsed = submission_time - interaction_time; 928 if (did_autofill_some_possible_fields) { 929 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed); 930 } else { 931 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill( 932 elapsed); 933 } 934 } 935 } 936} 937 938const AutofillField* FormStructure::field(size_t index) const { 939 if (index >= fields_.size()) { 940 NOTREACHED(); 941 return NULL; 942 } 943 944 return fields_[index]; 945} 946 947AutofillField* FormStructure::field(size_t index) { 948 return const_cast<AutofillField*>( 949 static_cast<const FormStructure*>(this)->field(index)); 950} 951 952size_t FormStructure::field_count() const { 953 return fields_.size(); 954} 955 956size_t FormStructure::active_field_count() const { 957 return active_field_count_; 958} 959 960std::string FormStructure::server_experiment_id() const { 961 return server_experiment_id_; 962} 963 964FormData FormStructure::ToFormData() const { 965 // |data.user_submitted| will always be false. 966 FormData data; 967 data.name = form_name_; 968 data.origin = source_url_; 969 data.action = target_url_; 970 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET"); 971 972 for (size_t i = 0; i < fields_.size(); ++i) { 973 data.fields.push_back(FormFieldData(*fields_[i])); 974 } 975 976 return data; 977} 978 979bool FormStructure::operator==(const FormData& form) const { 980 // TODO(jhawkins): Is this enough to differentiate a form? 981 if (form_name_ == form.name && 982 source_url_ == form.origin && 983 target_url_ == form.action) { 984 return true; 985 } 986 987 // TODO(jhawkins): Compare field names, IDs and labels once we have labels 988 // set up. 989 990 return false; 991} 992 993bool FormStructure::operator!=(const FormData& form) const { 994 return !operator==(form); 995} 996 997std::string FormStructure::Hash64Bit(const std::string& str) { 998 std::string hash_bin = base::SHA1HashString(str); 999 DCHECK_EQ(20U, hash_bin.length()); 1000 1001 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) | 1002 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) | 1003 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) | 1004 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) | 1005 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) | 1006 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) | 1007 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) | 1008 ((static_cast<uint64>(hash_bin[7])) & 0xFF); 1009 1010 return base::Uint64ToString(hash64); 1011} 1012 1013bool FormStructure::EncodeFormRequest( 1014 FormStructure::EncodeRequestType request_type, 1015 buzz::XmlElement* encompassing_xml_element) const { 1016 if (!field_count()) // Nothing to add. 1017 return false; 1018 1019 // Some badly formatted web sites repeat fields - limit number of fields to 1020 // 48, which is far larger than any valid form and XML still fits into 2K. 1021 // Do not send requests for forms with more than this many fields, as they are 1022 // near certainly not valid/auto-fillable. 1023 const size_t kMaxFieldsOnTheForm = 48; 1024 if (field_count() > kMaxFieldsOnTheForm) 1025 return false; 1026 1027 // Add the child nodes for the form fields. 1028 for (size_t index = 0; index < field_count(); ++index) { 1029 const AutofillField* field = fields_[index]; 1030 switch (request_type) { 1031 case FormStructure::UPLOAD: 1032 EncodeFieldForUpload(*field, encompassing_xml_element); 1033 break; 1034 case FormStructure::QUERY: 1035 if (ShouldSkipField(*field)) 1036 continue; 1037 EncodeFieldForQuery(*field, encompassing_xml_element); 1038 break; 1039 case FormStructure::FIELD_ASSIGNMENTS: 1040 EncodeFieldForFieldAssignments(*field, encompassing_xml_element); 1041 break; 1042 } 1043 } 1044 return true; 1045} 1046 1047void FormStructure::ParseFieldTypesFromAutocompleteAttributes( 1048 bool* found_types, 1049 bool* found_sections) { 1050 const std::string kDefaultSection = "-default"; 1051 1052 *found_types = false; 1053 *found_sections = false; 1054 for (std::vector<AutofillField*>::iterator it = fields_.begin(); 1055 it != fields_.end(); ++it) { 1056 AutofillField* field = *it; 1057 1058 // To prevent potential section name collisions, add a default suffix for 1059 // other fields. Without this, 'autocomplete' attribute values 1060 // "section--shipping street-address" and "shipping street-address" would be 1061 // parsed identically, given the section handling code below. We do this 1062 // before any validation so that fields with invalid attributes still end up 1063 // in the default section. These default section names will be overridden 1064 // by subsequent heuristic parsing steps if there are no author-specified 1065 // section names. 1066 field->set_section(kDefaultSection); 1067 1068 // Canonicalize the attribute value by trimming whitespace, collapsing 1069 // non-space characters (e.g. tab) to spaces, and converting to lowercase. 1070 std::string autocomplete_attribute = 1071 CollapseWhitespaceASCII(field->autocomplete_attribute, false); 1072 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute); 1073 1074 // The autocomplete attribute is overloaded: it can specify either a field 1075 // type hint or whether autocomplete should be enabled at all. Ignore the 1076 // latter type of attribute value. 1077 if (autocomplete_attribute.empty() || 1078 autocomplete_attribute == "on" || 1079 autocomplete_attribute == "off") { 1080 continue; 1081 } 1082 1083 // Any other value, even it is invalid, is considered to be a type hint. 1084 // This allows a website's author to specify an attribute like 1085 // autocomplete="other" on a field to disable all Autofill heuristics for 1086 // the form. 1087 *found_types = true; 1088 1089 // Tokenize the attribute value. Per the spec, the tokens are parsed in 1090 // reverse order. 1091 std::vector<std::string> tokens; 1092 Tokenize(autocomplete_attribute, " ", &tokens); 1093 1094 // The final token must be the field type. 1095 // If it is not one of the known types, abort. 1096 DCHECK(!tokens.empty()); 1097 std::string field_type_token = tokens.back(); 1098 tokens.pop_back(); 1099 AutofillFieldType field_type = 1100 FieldTypeFromAutocompleteType(field_type_token, *field); 1101 if (field_type == UNKNOWN_TYPE) 1102 continue; 1103 1104 // The preceding token, if any, may be a type hint. 1105 if (!tokens.empty() && IsContactTypeHint(tokens.back())) { 1106 // If it is, it must match the field type; otherwise, abort. 1107 // Note that an invalid token invalidates the entire attribute value, even 1108 // if the other tokens are valid. 1109 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type)) 1110 continue; 1111 1112 // Chrome Autofill ignores these type hints. 1113 tokens.pop_back(); 1114 } 1115 1116 // The preceding token, if any, may be a fixed string that is either 1117 // "shipping" or "billing". Chrome Autofill treats these as implicit 1118 // section name suffixes. 1119 DCHECK_EQ(kDefaultSection, field->section()); 1120 std::string section = field->section(); 1121 if (!tokens.empty() && 1122 (tokens.back() == kShippingSection || 1123 tokens.back() == kBillingSection)) { 1124 // Set Autofill field type to billing if section is billing. 1125 if (tokens.back() == kBillingSection) 1126 field_type = AutofillType::GetEquivalentBillingFieldType(field_type); 1127 section = "-" + tokens.back(); 1128 tokens.pop_back(); 1129 } 1130 1131 // The preceding token, if any, may be a named section. 1132 const std::string kSectionPrefix = "section-"; 1133 if (!tokens.empty() && 1134 StartsWithASCII(tokens.back(), kSectionPrefix, true)) { 1135 // Prepend this section name to the suffix set in the preceding block. 1136 section = tokens.back().substr(kSectionPrefix.size()) + section; 1137 tokens.pop_back(); 1138 } 1139 1140 // No other tokens are allowed. If there are any remaining, abort. 1141 if (!tokens.empty()) 1142 continue; 1143 1144 if (section != kDefaultSection) { 1145 *found_sections = true; 1146 field->set_section(section); 1147 } 1148 1149 // No errors encountered while parsing! 1150 // Update the |field|'s type based on what was parsed from the attribute. 1151 field->set_heuristic_type(field_type); 1152 if (field_type_token == "tel-local-prefix") 1153 field->set_phone_part(AutofillField::PHONE_PREFIX); 1154 else if (field_type_token == "tel-local-suffix") 1155 field->set_phone_part(AutofillField::PHONE_SUFFIX); 1156 } 1157} 1158 1159void FormStructure::IdentifySections(bool has_author_specified_sections) { 1160 if (fields_.empty()) 1161 return; 1162 1163 if (!has_author_specified_sections) { 1164 // Name sections after the first field in the section. 1165 base::string16 current_section = fields_.front()->unique_name(); 1166 1167 // Keep track of the types we've seen in this section. 1168 std::set<AutofillFieldType> seen_types; 1169 AutofillFieldType previous_type = UNKNOWN_TYPE; 1170 1171 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1172 field != fields_.end(); ++field) { 1173 const AutofillFieldType current_type = 1174 AutofillType::GetEquivalentFieldType((*field)->type()); 1175 1176 bool already_saw_current_type = seen_types.count(current_type) > 0; 1177 1178 // Forms often ask for multiple phone numbers -- e.g. both a daytime and 1179 // evening phone number. Our phone number detection is also generally a 1180 // little off. Hence, ignore this field type as a signal here. 1181 if (AutofillType(current_type).group() == AutofillType::PHONE_HOME) 1182 already_saw_current_type = false; 1183 1184 // Some forms have adjacent fields of the same type. Two common examples: 1185 // * Forms with two email fields, where the second is meant to "confirm" 1186 // the first. 1187 // * Forms with a <select> menu for states in some countries, and a 1188 // freeform <input> field for states in other countries. (Usually, 1189 // only one of these two will be visible for any given choice of 1190 // country.) 1191 // Generally, adjacent fields of the same type belong in the same logical 1192 // section. 1193 if (current_type == previous_type) 1194 already_saw_current_type = false; 1195 1196 previous_type = current_type; 1197 1198 if (current_type != UNKNOWN_TYPE && already_saw_current_type) { 1199 // We reached the end of a section, so start a new section. 1200 seen_types.clear(); 1201 current_section = (*field)->unique_name(); 1202 } 1203 1204 seen_types.insert(current_type); 1205 (*field)->set_section(UTF16ToUTF8(current_section)); 1206 } 1207 } 1208 1209 // Ensure that credit card and address fields are in separate sections. 1210 // This simplifies the section-aware logic in autofill_manager.cc. 1211 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1212 field != fields_.end(); ++field) { 1213 AutofillType::FieldTypeGroup field_type_group = 1214 AutofillType((*field)->type()).group(); 1215 if (field_type_group == AutofillType::CREDIT_CARD) 1216 (*field)->set_section((*field)->section() + "-cc"); 1217 else 1218 (*field)->set_section((*field)->section() + "-default"); 1219 } 1220} 1221 1222} // namespace autofill 1223