form_structure.cc revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/core/browser/form_structure.h" 6 7#include <utility> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/i18n/case_conversion.h" 12#include "base/logging.h" 13#include "base/memory/scoped_ptr.h" 14#include "base/sha1.h" 15#include "base/strings/string_number_conversions.h" 16#include "base/strings/string_util.h" 17#include "base/strings/stringprintf.h" 18#include "base/strings/utf_string_conversions.h" 19#include "base/time/time.h" 20#include "components/autofill/core/browser/autofill_metrics.h" 21#include "components/autofill/core/browser/autofill_type.h" 22#include "components/autofill/core/browser/autofill_xml_parser.h" 23#include "components/autofill/core/browser/field_types.h" 24#include "components/autofill/core/browser/form_field.h" 25#include "components/autofill/core/common/autofill_constants.h" 26#include "components/autofill/core/common/form_data.h" 27#include "components/autofill/core/common/form_data_predictions.h" 28#include "components/autofill/core/common/form_field_data.h" 29#include "components/autofill/core/common/form_field_data_predictions.h" 30#include "third_party/icu/source/i18n/unicode/regex.h" 31#include "third_party/libjingle/source/talk/xmllite/xmlelement.h" 32 33namespace autofill { 34namespace { 35 36const char kFormMethodPost[] = "post"; 37 38// XML elements and attributes. 39const char kAttributeAutofillUsed[] = "autofillused"; 40const char kAttributeAutofillType[] = "autofilltype"; 41const char kAttributeClientVersion[] = "clientversion"; 42const char kAttributeDataPresent[] = "datapresent"; 43const char kAttributeFieldID[] = "fieldid"; 44const char kAttributeFieldType[] = "fieldtype"; 45const char kAttributeFormSignature[] = "formsignature"; 46const char kAttributeName[] = "name"; 47const char kAttributeSignature[] = "signature"; 48const char kClientVersion[] = "6.1.1715.1442/en (GGLL)"; 49const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; 50const char kXMLElementAutofillQuery[] = "autofillquery"; 51const char kXMLElementAutofillUpload[] = "autofillupload"; 52const char kXMLElementFieldAssignments[] = "fieldassignments"; 53const char kXMLElementField[] = "field"; 54const char kXMLElementFields[] = "fields"; 55const char kXMLElementForm[] = "form"; 56const char kBillingMode[] = "billing"; 57const char kShippingMode[] = "shipping"; 58 59// Stip away >= 5 consecutive digits. 60const char kIgnorePatternInFieldName[] = "\\d{5,}+"; 61 62// Helper for |EncodeUploadRequest()| that creates a bit field corresponding to 63// |available_field_types| and returns the hex representation as a string. 64std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) { 65 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte, 66 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field. 67 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8; 68 69 // Pack the types in |available_field_types| into |bit_field|. 70 std::vector<uint8> bit_field(kNumBytes, 0); 71 for (ServerFieldTypeSet::const_iterator field_type = 72 available_field_types.begin(); 73 field_type != available_field_types.end(); 74 ++field_type) { 75 // Set the appropriate bit in the field. The bit we set is the one 76 // |field_type| % 8 from the left of the byte. 77 const size_t byte = *field_type / 8; 78 const size_t bit = 0x80 >> (*field_type % 8); 79 DCHECK(byte < bit_field.size()); 80 bit_field[byte] |= bit; 81 } 82 83 // Discard any trailing zeroes. 84 // If there are no available types, we return the empty string. 85 size_t data_end = bit_field.size(); 86 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) { 87 } 88 89 // Print all meaningfull bytes into a string. 90 std::string data_presence; 91 data_presence.reserve(data_end * 2 + 1); 92 for (size_t i = 0; i < data_end; ++i) { 93 base::StringAppendF(&data_presence, "%02x", bit_field[i]); 94 } 95 96 return data_presence; 97} 98 99// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 100// in upload xml, and also add them to the parent XmlElement. 101void EncodeFieldForUpload(const AutofillField& field, 102 buzz::XmlElement* parent) { 103 // Don't upload checkable fields. 104 if (field.is_checkable) 105 return; 106 107 ServerFieldTypeSet types = field.possible_types(); 108 // |types| could be empty in unit-tests only. 109 for (ServerFieldTypeSet::iterator field_type = types.begin(); 110 field_type != types.end(); ++field_type) { 111 buzz::XmlElement *field_element = new buzz::XmlElement( 112 buzz::QName(kXMLElementField)); 113 114 field_element->SetAttr(buzz::QName(kAttributeSignature), 115 field.FieldSignature()); 116 field_element->SetAttr(buzz::QName(kAttributeAutofillType), 117 base::IntToString(*field_type)); 118 parent->AddElement(field_element); 119 } 120} 121 122// Helper for |EncodeFormRequest()| that creates XmlElement for the given field 123// in query xml, and also add it to the parent XmlElement. 124void EncodeFieldForQuery(const AutofillField& field, 125 buzz::XmlElement* parent) { 126 buzz::XmlElement *field_element = new buzz::XmlElement( 127 buzz::QName(kXMLElementField)); 128 field_element->SetAttr(buzz::QName(kAttributeSignature), 129 field.FieldSignature()); 130 parent->AddElement(field_element); 131} 132 133// Helper for |EncodeFormRequest()| that creates XmlElements for the given field 134// in field assignments xml, and also add them to the parent XmlElement. 135void EncodeFieldForFieldAssignments(const AutofillField& field, 136 buzz::XmlElement* parent) { 137 ServerFieldTypeSet types = field.possible_types(); 138 for (ServerFieldTypeSet::iterator field_type = types.begin(); 139 field_type != types.end(); ++field_type) { 140 buzz::XmlElement *field_element = new buzz::XmlElement( 141 buzz::QName(kXMLElementFields)); 142 143 field_element->SetAttr(buzz::QName(kAttributeFieldID), 144 field.FieldSignature()); 145 field_element->SetAttr(buzz::QName(kAttributeFieldType), 146 base::IntToString(*field_type)); 147 field_element->SetAttr(buzz::QName(kAttributeName), 148 base::UTF16ToUTF8(field.name)); 149 parent->AddElement(field_element); 150 } 151} 152 153// Returns |true| iff the |token| is a type hint for a contact field, as 154// specified in the implementation section of http://is.gd/whatwg_autocomplete 155// Note that "fax" and "pager" are intentionally ignored, as Chrome does not 156// support filling either type of information. 157bool IsContactTypeHint(const std::string& token) { 158 return token == "home" || token == "work" || token == "mobile"; 159} 160 161// Returns |true| iff the |token| is a type hint appropriate for a field of the 162// given |field_type|, as specified in the implementation section of 163// http://is.gd/whatwg_autocomplete 164bool ContactTypeHintMatchesFieldType(const std::string& token, 165 HtmlFieldType field_type) { 166 // The "home" and "work" type hints are only appropriate for email and phone 167 // number field types. 168 if (token == "home" || token == "work") { 169 return field_type == HTML_TYPE_EMAIL || 170 (field_type >= HTML_TYPE_TEL && 171 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX); 172 } 173 174 // The "mobile" type hint is only appropriate for phone number field types. 175 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not 176 // support filling either type of information. 177 if (token == "mobile") { 178 return field_type >= HTML_TYPE_TEL && 179 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX; 180 } 181 182 return false; 183} 184 185// Returns the Chrome Autofill-supported field type corresponding to the given 186// |autocomplete_attribute_value|, if there is one, in the context of the given 187// |field|. Chrome Autofill supports a subset of the field types listed at 188// http://is.gd/whatwg_autocomplete 189HtmlFieldType FieldTypeFromAutocompleteAttributeValue( 190 const std::string& autocomplete_attribute_value, 191 const AutofillField& field) { 192 if (autocomplete_attribute_value == "name") 193 return HTML_TYPE_NAME; 194 195 if (autocomplete_attribute_value == "given-name") 196 return HTML_TYPE_GIVEN_NAME; 197 198 if (autocomplete_attribute_value == "additional-name") { 199 if (field.max_length == 1) 200 return HTML_TYPE_ADDITIONAL_NAME_INITIAL; 201 else 202 return HTML_TYPE_ADDITIONAL_NAME; 203 } 204 205 if (autocomplete_attribute_value == "family-name") 206 return HTML_TYPE_FAMILY_NAME; 207 208 if (autocomplete_attribute_value == "organization") 209 return HTML_TYPE_ORGANIZATION; 210 211 if (autocomplete_attribute_value == "street-address") 212 return HTML_TYPE_STREET_ADDRESS; 213 214 if (autocomplete_attribute_value == "address-line1") 215 return HTML_TYPE_ADDRESS_LINE1; 216 217 if (autocomplete_attribute_value == "address-line2") 218 return HTML_TYPE_ADDRESS_LINE2; 219 220 if (autocomplete_attribute_value == "locality") 221 return HTML_TYPE_LOCALITY; 222 223 if (autocomplete_attribute_value == "region") 224 return HTML_TYPE_REGION; 225 226 if (autocomplete_attribute_value == "country") 227 return HTML_TYPE_COUNTRY_CODE; 228 229 if (autocomplete_attribute_value == "country-name") 230 return HTML_TYPE_COUNTRY_NAME; 231 232 if (autocomplete_attribute_value == "postal-code") 233 return HTML_TYPE_POSTAL_CODE; 234 235 // content_switches.h isn't accessible from here, hence we have 236 // to copy the string literal. This should be removed soon anyway. 237 if (autocomplete_attribute_value == "address" && 238 CommandLine::ForCurrentProcess()->HasSwitch( 239 "enable-experimental-web-platform-features")) { 240 return HTML_TYPE_FULL_ADDRESS; 241 } 242 243 if (autocomplete_attribute_value == "cc-name") 244 return HTML_TYPE_CREDIT_CARD_NAME; 245 246 if (autocomplete_attribute_value == "cc-number") 247 return HTML_TYPE_CREDIT_CARD_NUMBER; 248 249 if (autocomplete_attribute_value == "cc-exp") { 250 if (field.max_length == 5) 251 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR; 252 else if (field.max_length == 7) 253 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR; 254 else 255 return HTML_TYPE_CREDIT_CARD_EXP; 256 } 257 258 if (autocomplete_attribute_value == "cc-exp-month") 259 return HTML_TYPE_CREDIT_CARD_EXP_MONTH; 260 261 if (autocomplete_attribute_value == "cc-exp-year") { 262 if (field.max_length == 2) 263 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR; 264 else if (field.max_length == 4) 265 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR; 266 else 267 return HTML_TYPE_CREDIT_CARD_EXP_YEAR; 268 } 269 270 if (autocomplete_attribute_value == "cc-csc") 271 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE; 272 273 if (autocomplete_attribute_value == "cc-type") 274 return HTML_TYPE_CREDIT_CARD_TYPE; 275 276 if (autocomplete_attribute_value == "tel") 277 return HTML_TYPE_TEL; 278 279 if (autocomplete_attribute_value == "tel-country-code") 280 return HTML_TYPE_TEL_COUNTRY_CODE; 281 282 if (autocomplete_attribute_value == "tel-national") 283 return HTML_TYPE_TEL_NATIONAL; 284 285 if (autocomplete_attribute_value == "tel-area-code") 286 return HTML_TYPE_TEL_AREA_CODE; 287 288 if (autocomplete_attribute_value == "tel-local") 289 return HTML_TYPE_TEL_LOCAL; 290 291 if (autocomplete_attribute_value == "tel-local-prefix") 292 return HTML_TYPE_TEL_LOCAL_PREFIX; 293 294 if (autocomplete_attribute_value == "tel-local-suffix") 295 return HTML_TYPE_TEL_LOCAL_SUFFIX; 296 297 if (autocomplete_attribute_value == "email") 298 return HTML_TYPE_EMAIL; 299 300 return HTML_TYPE_UNKNOWN; 301} 302 303std::string StripDigitsIfRequired(const base::string16& input) { 304 UErrorCode status = U_ZERO_ERROR; 305 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern, 306 (kIgnorePatternInFieldName)); 307 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher, 308 (icu_pattern, UREGEX_CASE_INSENSITIVE, status)); 309 DCHECK_EQ(status, U_ZERO_ERROR); 310 311 icu::UnicodeString icu_input(input.data(), input.length()); 312 matcher.reset(icu_input); 313 314 icu::UnicodeString replaced_string = matcher.replaceAll("", status); 315 316 std::string return_string; 317 status = U_ZERO_ERROR; 318 base::UTF16ToUTF8(replaced_string.getBuffer(), 319 static_cast<size_t>(replaced_string.length()), 320 &return_string); 321 if (status != U_ZERO_ERROR) { 322 DVLOG(1) << "Couldn't strip digits in " << base::UTF16ToUTF8(input); 323 return base::UTF16ToUTF8(input); 324 } 325 326 return return_string; 327} 328 329} // namespace 330 331FormStructure::FormStructure(const FormData& form) 332 : form_name_(form.name), 333 source_url_(form.origin), 334 target_url_(form.action), 335 autofill_count_(0), 336 active_field_count_(0), 337 upload_required_(USE_UPLOAD_RATES), 338 has_author_specified_types_(false) { 339 // Copy the form fields. 340 std::map<base::string16, size_t> unique_names; 341 for (std::vector<FormFieldData>::const_iterator field = 342 form.fields.begin(); 343 field != form.fields.end(); ++field) { 344 if (!ShouldSkipField(*field)) { 345 // Add all supported form fields (including with empty names) to the 346 // signature. This is a requirement for Autofill servers. 347 form_signature_field_names_.append("&"); 348 form_signature_field_names_.append(StripDigitsIfRequired(field->name)); 349 350 ++active_field_count_; 351 } 352 353 // Generate a unique name for this field by appending a counter to the name. 354 // Make sure to prepend the counter with a non-numeric digit so that we are 355 // guaranteed to avoid collisions. 356 if (!unique_names.count(field->name)) 357 unique_names[field->name] = 1; 358 else 359 ++unique_names[field->name]; 360 base::string16 unique_name = field->name + base::ASCIIToUTF16("_") + 361 base::IntToString16(unique_names[field->name]); 362 fields_.push_back(new AutofillField(*field, unique_name)); 363 } 364 365 std::string method = base::UTF16ToUTF8(form.method); 366 if (StringToLowerASCII(method) == kFormMethodPost) { 367 method_ = POST; 368 } else { 369 // Either the method is 'get', or we don't know. In this case we default 370 // to GET. 371 method_ = GET; 372 } 373} 374 375FormStructure::~FormStructure() {} 376 377void FormStructure::DetermineHeuristicTypes( 378 const AutofillMetrics& metric_logger) { 379 // First, try to detect field types based on each field's |autocomplete| 380 // attribute value. If there is at least one form field that specifies an 381 // autocomplete type hint, don't try to apply other heuristics to match fields 382 // in this form. 383 bool has_author_specified_sections; 384 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_, 385 &has_author_specified_sections); 386 387 if (!has_author_specified_types_) { 388 ServerFieldTypeMap field_type_map; 389 FormField::ParseFormFields(fields_.get(), &field_type_map); 390 for (size_t i = 0; i < field_count(); ++i) { 391 AutofillField* field = fields_[i]; 392 ServerFieldTypeMap::iterator iter = 393 field_type_map.find(field->unique_name()); 394 if (iter != field_type_map.end()) 395 field->set_heuristic_type(iter->second); 396 } 397 } 398 399 UpdateAutofillCount(); 400 IdentifySections(has_author_specified_sections); 401 402 if (IsAutofillable(true)) { 403 metric_logger.LogDeveloperEngagementMetric( 404 AutofillMetrics::FILLABLE_FORM_PARSED); 405 if (has_author_specified_types_) { 406 metric_logger.LogDeveloperEngagementMetric( 407 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS); 408 } 409 } 410} 411 412bool FormStructure::EncodeUploadRequest( 413 const ServerFieldTypeSet& available_field_types, 414 bool form_was_autofilled, 415 std::string* encoded_xml) const { 416 DCHECK(ShouldBeCrowdsourced()); 417 418 // Verify that |available_field_types| agrees with the possible field types we 419 // are uploading. 420 for (std::vector<AutofillField*>::const_iterator field = begin(); 421 field != end(); 422 ++field) { 423 for (ServerFieldTypeSet::const_iterator type = 424 (*field)->possible_types().begin(); 425 type != (*field)->possible_types().end(); 426 ++type) { 427 DCHECK(*type == UNKNOWN_TYPE || 428 *type == EMPTY_TYPE || 429 available_field_types.count(*type)); 430 } 431 } 432 433 // Set up the <autofillupload> element and its attributes. 434 buzz::XmlElement autofill_request_xml( 435 (buzz::QName(kXMLElementAutofillUpload))); 436 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 437 kClientVersion); 438 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 439 FormSignature()); 440 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed), 441 form_was_autofilled ? "true" : "false"); 442 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent), 443 EncodeFieldTypes(available_field_types).c_str()); 444 445 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml)) 446 return false; // Malformed form, skip it. 447 448 // Obtain the XML structure as a string. 449 *encoded_xml = kXMLDeclaration; 450 *encoded_xml += autofill_request_xml.Str().c_str(); 451 452 // To enable this logging, run with the flag --vmodule="form_structure=2". 453 VLOG(2) << "\n" << *encoded_xml; 454 455 return true; 456} 457 458bool FormStructure::EncodeFieldAssignments( 459 const ServerFieldTypeSet& available_field_types, 460 std::string* encoded_xml) const { 461 DCHECK(ShouldBeCrowdsourced()); 462 463 // Set up the <fieldassignments> element and its attributes. 464 buzz::XmlElement autofill_request_xml( 465 (buzz::QName(kXMLElementFieldAssignments))); 466 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature), 467 FormSignature()); 468 469 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS, 470 &autofill_request_xml)) 471 return false; // Malformed form, skip it. 472 473 // Obtain the XML structure as a string. 474 *encoded_xml = kXMLDeclaration; 475 *encoded_xml += autofill_request_xml.Str().c_str(); 476 477 return true; 478} 479 480// static 481bool FormStructure::EncodeQueryRequest( 482 const std::vector<FormStructure*>& forms, 483 std::vector<std::string>* encoded_signatures, 484 std::string* encoded_xml) { 485 DCHECK(encoded_signatures); 486 DCHECK(encoded_xml); 487 encoded_xml->clear(); 488 encoded_signatures->clear(); 489 encoded_signatures->reserve(forms.size()); 490 491 // Set up the <autofillquery> element and attributes. 492 buzz::XmlElement autofill_request_xml( 493 (buzz::QName(kXMLElementAutofillQuery))); 494 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion), 495 kClientVersion); 496 497 // Some badly formatted web sites repeat forms - detect that and encode only 498 // one form as returned data would be the same for all the repeated forms. 499 std::set<std::string> processed_forms; 500 for (ScopedVector<FormStructure>::const_iterator it = forms.begin(); 501 it != forms.end(); 502 ++it) { 503 std::string signature((*it)->FormSignature()); 504 if (processed_forms.find(signature) != processed_forms.end()) 505 continue; 506 processed_forms.insert(signature); 507 scoped_ptr<buzz::XmlElement> encompassing_xml_element( 508 new buzz::XmlElement(buzz::QName(kXMLElementForm))); 509 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature), 510 signature); 511 512 if (!(*it)->EncodeFormRequest(FormStructure::QUERY, 513 encompassing_xml_element.get())) 514 continue; // Malformed form, skip it. 515 516 autofill_request_xml.AddElement(encompassing_xml_element.release()); 517 encoded_signatures->push_back(signature); 518 } 519 520 if (!encoded_signatures->size()) 521 return false; 522 523 // Note: Chrome used to also set 'accepts="e"' (where 'e' is for experiments), 524 // but no longer sets this because support for experiments is deprecated. If 525 // it ever resurfaces, re-add code here to set the attribute accordingly. 526 527 // Obtain the XML structure as a string. 528 *encoded_xml = kXMLDeclaration; 529 *encoded_xml += autofill_request_xml.Str().c_str(); 530 531 return true; 532} 533 534// static 535void FormStructure::ParseQueryResponse( 536 const std::string& response_xml, 537 const std::vector<FormStructure*>& forms, 538 const AutofillMetrics& metric_logger) { 539 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED); 540 541 // Parse the field types from the server response to the query. 542 std::vector<AutofillServerFieldInfo> field_infos; 543 UploadRequired upload_required; 544 AutofillQueryXmlParser parse_handler(&field_infos, 545 &upload_required); 546 buzz::XmlParser parser(&parse_handler); 547 parser.Parse(response_xml.c_str(), response_xml.length(), true); 548 if (!parse_handler.succeeded()) 549 return; 550 551 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED); 552 553 bool heuristics_detected_fillable_field = false; 554 bool query_response_overrode_heuristics = false; 555 556 // Copy the field types into the actual form. 557 std::vector<AutofillServerFieldInfo>::iterator current_info = 558 field_infos.begin(); 559 for (std::vector<FormStructure*>::const_iterator iter = forms.begin(); 560 iter != forms.end(); ++iter) { 561 FormStructure* form = *iter; 562 form->upload_required_ = upload_required; 563 564 for (std::vector<AutofillField*>::iterator field = form->fields_.begin(); 565 field != form->fields_.end(); ++field) { 566 if (form->ShouldSkipField(**field)) 567 continue; 568 569 // In some cases *successful* response does not return all the fields. 570 // Quit the update of the types then. 571 if (current_info == field_infos.end()) 572 break; 573 574 // UNKNOWN_TYPE is reserved for use by the client. 575 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE); 576 577 ServerFieldType heuristic_type = (*field)->heuristic_type(); 578 if (heuristic_type != UNKNOWN_TYPE) 579 heuristics_detected_fillable_field = true; 580 581 (*field)->set_server_type(current_info->field_type); 582 if (heuristic_type != (*field)->Type().GetStorableType()) 583 query_response_overrode_heuristics = true; 584 585 // Copy default value into the field if available. 586 if (!current_info->default_value.empty()) 587 (*field)->set_default_value(current_info->default_value); 588 589 ++current_info; 590 } 591 592 form->UpdateAutofillCount(); 593 form->IdentifySections(false); 594 } 595 596 AutofillMetrics::ServerQueryMetric metric; 597 if (query_response_overrode_heuristics) { 598 if (heuristics_detected_fillable_field) { 599 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS; 600 } else { 601 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS; 602 } 603 } else { 604 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS; 605 } 606 metric_logger.LogServerQueryMetric(metric); 607} 608 609// static 610void FormStructure::GetFieldTypePredictions( 611 const std::vector<FormStructure*>& form_structures, 612 std::vector<FormDataPredictions>* forms) { 613 forms->clear(); 614 forms->reserve(form_structures.size()); 615 for (size_t i = 0; i < form_structures.size(); ++i) { 616 FormStructure* form_structure = form_structures[i]; 617 FormDataPredictions form; 618 form.data.name = form_structure->form_name_; 619 form.data.method = 620 base::ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET"); 621 form.data.origin = form_structure->source_url_; 622 form.data.action = form_structure->target_url_; 623 form.signature = form_structure->FormSignature(); 624 625 for (std::vector<AutofillField*>::const_iterator field = 626 form_structure->fields_.begin(); 627 field != form_structure->fields_.end(); ++field) { 628 form.data.fields.push_back(FormFieldData(**field)); 629 630 FormFieldDataPredictions annotated_field; 631 annotated_field.signature = (*field)->FieldSignature(); 632 annotated_field.heuristic_type = 633 AutofillType((*field)->heuristic_type()).ToString(); 634 annotated_field.server_type = 635 AutofillType((*field)->server_type()).ToString(); 636 annotated_field.overall_type = (*field)->Type().ToString(); 637 form.fields.push_back(annotated_field); 638 } 639 640 forms->push_back(form); 641 } 642} 643 644std::string FormStructure::FormSignature() const { 645 std::string scheme(target_url_.scheme()); 646 std::string host(target_url_.host()); 647 648 // If target host or scheme is empty, set scheme and host of source url. 649 // This is done to match the Toolbar's behavior. 650 if (scheme.empty() || host.empty()) { 651 scheme = source_url_.scheme(); 652 host = source_url_.host(); 653 } 654 655 std::string form_string = scheme + "://" + host + "&" + 656 base::UTF16ToUTF8(form_name_) + 657 form_signature_field_names_; 658 659 return Hash64Bit(form_string); 660} 661 662bool FormStructure::ShouldSkipField(const FormFieldData& field) const { 663 return field.is_checkable; 664} 665 666bool FormStructure::IsAutofillable(bool require_method_post) const { 667 if (autofill_count() < kRequiredAutofillFields) 668 return false; 669 670 return ShouldBeParsed(require_method_post); 671} 672 673void FormStructure::UpdateAutofillCount() { 674 autofill_count_ = 0; 675 for (std::vector<AutofillField*>::const_iterator iter = begin(); 676 iter != end(); ++iter) { 677 AutofillField* field = *iter; 678 if (field && field->IsFieldFillable()) 679 ++autofill_count_; 680 } 681} 682 683bool FormStructure::ShouldBeParsed(bool require_method_post) const { 684 if (active_field_count() < kRequiredAutofillFields) 685 return false; 686 687 // Rule out http(s)://*/search?... 688 // e.g. http://www.google.com/search?q=... 689 // http://search.yahoo.com/search?p=... 690 if (target_url_.path() == "/search") 691 return false; 692 693 bool has_text_field = false; 694 for (std::vector<AutofillField*>::const_iterator it = begin(); 695 it != end() && !has_text_field; ++it) { 696 has_text_field |= (*it)->form_control_type != "select-one"; 697 } 698 if (!has_text_field) 699 return false; 700 701 return !require_method_post || (method_ == POST); 702} 703 704bool FormStructure::ShouldBeCrowdsourced() const { 705 return !has_author_specified_types_ && ShouldBeParsed(true); 706} 707 708void FormStructure::UpdateFromCache(const FormStructure& cached_form) { 709 // Map from field signatures to cached fields. 710 std::map<std::string, const AutofillField*> cached_fields; 711 for (size_t i = 0; i < cached_form.field_count(); ++i) { 712 const AutofillField* field = cached_form.field(i); 713 cached_fields[field->FieldSignature()] = field; 714 } 715 716 for (std::vector<AutofillField*>::const_iterator iter = begin(); 717 iter != end(); ++iter) { 718 AutofillField* field = *iter; 719 720 std::map<std::string, const AutofillField*>::const_iterator 721 cached_field = cached_fields.find(field->FieldSignature()); 722 if (cached_field != cached_fields.end()) { 723 if (field->form_control_type != "select-one" && 724 field->value == cached_field->second->value) { 725 // From the perspective of learning user data, text fields containing 726 // default values are equivalent to empty fields. 727 field->value = base::string16(); 728 } 729 730 field->set_heuristic_type(cached_field->second->heuristic_type()); 731 field->set_server_type(cached_field->second->server_type()); 732 } 733 } 734 735 UpdateAutofillCount(); 736 737 // The form signature should match between query and upload requests to the 738 // server. On many websites, form elements are dynamically added, removed, or 739 // rearranged via JavaScript between page load and form submission, so we 740 // copy over the |form_signature_field_names_| corresponding to the query 741 // request. 742 DCHECK_EQ(cached_form.form_name_, form_name_); 743 DCHECK_EQ(cached_form.source_url_, source_url_); 744 DCHECK_EQ(cached_form.target_url_, target_url_); 745 form_signature_field_names_ = cached_form.form_signature_field_names_; 746} 747 748void FormStructure::LogQualityMetrics( 749 const AutofillMetrics& metric_logger, 750 const base::TimeTicks& load_time, 751 const base::TimeTicks& interaction_time, 752 const base::TimeTicks& submission_time) const { 753 size_t num_detected_field_types = 0; 754 bool did_autofill_all_possible_fields = true; 755 bool did_autofill_some_possible_fields = false; 756 for (size_t i = 0; i < field_count(); ++i) { 757 const AutofillField* field = this->field(i); 758 759 // No further logging for empty fields nor for fields where the entered data 760 // does not appear to already exist in the user's stored Autofill data. 761 const ServerFieldTypeSet& field_types = field->possible_types(); 762 DCHECK(!field_types.empty()); 763 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE)) 764 continue; 765 766 // Similarly, no further logging for password fields. Those are primarily 767 // related to a different feature code path, and so make more sense to track 768 // outside of this metric. 769 if (field->form_control_type == "password") 770 continue; 771 772 ++num_detected_field_types; 773 if (field->is_autofilled) 774 did_autofill_some_possible_fields = true; 775 else 776 did_autofill_all_possible_fields = false; 777 778 // Collapse field types that Chrome treats as identical, e.g. home and 779 // billing address fields. 780 ServerFieldTypeSet collapsed_field_types; 781 for (ServerFieldTypeSet::const_iterator it = field_types.begin(); 782 it != field_types.end(); 783 ++it) { 784 // Since we currently only support US phone numbers, the (city code + main 785 // digits) number is almost always identical to the whole phone number. 786 // TODO(isherman): Improve this logic once we add support for 787 // international numbers. 788 if (*it == PHONE_HOME_CITY_AND_NUMBER) 789 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER); 790 else 791 collapsed_field_types.insert(AutofillType(*it).GetStorableType()); 792 } 793 794 // Capture the field's type, if it is unambiguous. 795 ServerFieldType field_type = UNKNOWN_TYPE; 796 if (collapsed_field_types.size() == 1) 797 field_type = *collapsed_field_types.begin(); 798 799 ServerFieldType heuristic_type = 800 AutofillType(field->heuristic_type()).GetStorableType(); 801 ServerFieldType server_type = 802 AutofillType(field->server_type()).GetStorableType(); 803 ServerFieldType predicted_type = field->Type().GetStorableType(); 804 805 // Log heuristic, server, and overall type quality metrics, independently of 806 // whether the field was autofilled. 807 if (heuristic_type == UNKNOWN_TYPE) { 808 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 809 field_type); 810 } else if (field_types.count(heuristic_type)) { 811 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH, 812 field_type); 813 } else { 814 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH, 815 field_type); 816 } 817 818 if (server_type == NO_SERVER_DATA) { 819 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 820 field_type); 821 } else if (field_types.count(server_type)) { 822 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH, 823 field_type); 824 } else { 825 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH, 826 field_type); 827 } 828 829 if (predicted_type == UNKNOWN_TYPE) { 830 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN, 831 field_type); 832 } else if (field_types.count(predicted_type)) { 833 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH, 834 field_type); 835 } else { 836 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH, 837 field_type); 838 } 839 } 840 841 if (num_detected_field_types < kRequiredAutofillFields) { 842 metric_logger.LogUserHappinessMetric( 843 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM); 844 } else { 845 if (did_autofill_all_possible_fields) { 846 metric_logger.LogUserHappinessMetric( 847 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL); 848 } else if (did_autofill_some_possible_fields) { 849 metric_logger.LogUserHappinessMetric( 850 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME); 851 } else { 852 metric_logger.LogUserHappinessMetric( 853 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE); 854 } 855 856 // Unlike the other times, the |submission_time| should always be available. 857 DCHECK(!submission_time.is_null()); 858 859 // The |load_time| might be unset, in the case that the form was dynamically 860 // added to the DOM. 861 if (!load_time.is_null()) { 862 // Submission should always chronologically follow form load. 863 DCHECK(submission_time > load_time); 864 base::TimeDelta elapsed = submission_time - load_time; 865 if (did_autofill_some_possible_fields) 866 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed); 867 else 868 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed); 869 } 870 871 // The |interaction_time| might be unset, in the case that the user 872 // submitted a blank form. 873 if (!interaction_time.is_null()) { 874 // Submission should always chronologically follow interaction. 875 DCHECK(submission_time > interaction_time); 876 base::TimeDelta elapsed = submission_time - interaction_time; 877 if (did_autofill_some_possible_fields) { 878 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed); 879 } else { 880 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill( 881 elapsed); 882 } 883 } 884 } 885} 886 887const AutofillField* FormStructure::field(size_t index) const { 888 if (index >= fields_.size()) { 889 NOTREACHED(); 890 return NULL; 891 } 892 893 return fields_[index]; 894} 895 896AutofillField* FormStructure::field(size_t index) { 897 return const_cast<AutofillField*>( 898 static_cast<const FormStructure*>(this)->field(index)); 899} 900 901size_t FormStructure::field_count() const { 902 return fields_.size(); 903} 904 905size_t FormStructure::active_field_count() const { 906 return active_field_count_; 907} 908 909FormData FormStructure::ToFormData() const { 910 // |data.user_submitted| will always be false. 911 FormData data; 912 data.name = form_name_; 913 data.origin = source_url_; 914 data.action = target_url_; 915 data.method = base::ASCIIToUTF16(method_ == POST ? "POST" : "GET"); 916 917 for (size_t i = 0; i < fields_.size(); ++i) { 918 data.fields.push_back(FormFieldData(*fields_[i])); 919 } 920 921 return data; 922} 923 924bool FormStructure::operator==(const FormData& form) const { 925 // TODO(jhawkins): Is this enough to differentiate a form? 926 if (form_name_ == form.name && 927 source_url_ == form.origin && 928 target_url_ == form.action) { 929 return true; 930 } 931 932 // TODO(jhawkins): Compare field names, IDs and labels once we have labels 933 // set up. 934 935 return false; 936} 937 938bool FormStructure::operator!=(const FormData& form) const { 939 return !operator==(form); 940} 941 942std::string FormStructure::Hash64Bit(const std::string& str) { 943 std::string hash_bin = base::SHA1HashString(str); 944 DCHECK_EQ(20U, hash_bin.length()); 945 946 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) | 947 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) | 948 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) | 949 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) | 950 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) | 951 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) | 952 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) | 953 ((static_cast<uint64>(hash_bin[7])) & 0xFF); 954 955 return base::Uint64ToString(hash64); 956} 957 958bool FormStructure::EncodeFormRequest( 959 FormStructure::EncodeRequestType request_type, 960 buzz::XmlElement* encompassing_xml_element) const { 961 if (!field_count()) // Nothing to add. 962 return false; 963 964 // Some badly formatted web sites repeat fields - limit number of fields to 965 // 48, which is far larger than any valid form and XML still fits into 2K. 966 // Do not send requests for forms with more than this many fields, as they are 967 // near certainly not valid/auto-fillable. 968 const size_t kMaxFieldsOnTheForm = 48; 969 if (field_count() > kMaxFieldsOnTheForm) 970 return false; 971 972 // Add the child nodes for the form fields. 973 for (size_t index = 0; index < field_count(); ++index) { 974 const AutofillField* field = fields_[index]; 975 switch (request_type) { 976 case FormStructure::UPLOAD: 977 EncodeFieldForUpload(*field, encompassing_xml_element); 978 break; 979 case FormStructure::QUERY: 980 if (ShouldSkipField(*field)) 981 continue; 982 EncodeFieldForQuery(*field, encompassing_xml_element); 983 break; 984 case FormStructure::FIELD_ASSIGNMENTS: 985 EncodeFieldForFieldAssignments(*field, encompassing_xml_element); 986 break; 987 } 988 } 989 return true; 990} 991 992void FormStructure::ParseFieldTypesFromAutocompleteAttributes( 993 bool* found_types, 994 bool* found_sections) { 995 const std::string kDefaultSection = "-default"; 996 997 *found_types = false; 998 *found_sections = false; 999 for (std::vector<AutofillField*>::iterator it = fields_.begin(); 1000 it != fields_.end(); ++it) { 1001 AutofillField* field = *it; 1002 1003 // To prevent potential section name collisions, add a default suffix for 1004 // other fields. Without this, 'autocomplete' attribute values 1005 // "section--shipping street-address" and "shipping street-address" would be 1006 // parsed identically, given the section handling code below. We do this 1007 // before any validation so that fields with invalid attributes still end up 1008 // in the default section. These default section names will be overridden 1009 // by subsequent heuristic parsing steps if there are no author-specified 1010 // section names. 1011 field->set_section(kDefaultSection); 1012 1013 // Canonicalize the attribute value by trimming whitespace, collapsing 1014 // non-space characters (e.g. tab) to spaces, and converting to lowercase. 1015 std::string autocomplete_attribute = 1016 base::CollapseWhitespaceASCII(field->autocomplete_attribute, false); 1017 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute); 1018 1019 // The autocomplete attribute is overloaded: it can specify either a field 1020 // type hint or whether autocomplete should be enabled at all. Ignore the 1021 // latter type of attribute value. 1022 if (autocomplete_attribute.empty() || 1023 autocomplete_attribute == "on" || 1024 autocomplete_attribute == "off") { 1025 continue; 1026 } 1027 1028 // Any other value, even it is invalid, is considered to be a type hint. 1029 // This allows a website's author to specify an attribute like 1030 // autocomplete="other" on a field to disable all Autofill heuristics for 1031 // the form. 1032 *found_types = true; 1033 1034 // Tokenize the attribute value. Per the spec, the tokens are parsed in 1035 // reverse order. 1036 std::vector<std::string> tokens; 1037 Tokenize(autocomplete_attribute, " ", &tokens); 1038 1039 // The final token must be the field type. 1040 // If it is not one of the known types, abort. 1041 DCHECK(!tokens.empty()); 1042 std::string field_type_token = tokens.back(); 1043 tokens.pop_back(); 1044 HtmlFieldType field_type = 1045 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field); 1046 if (field_type == HTML_TYPE_UNKNOWN) 1047 continue; 1048 1049 // The preceding token, if any, may be a type hint. 1050 if (!tokens.empty() && IsContactTypeHint(tokens.back())) { 1051 // If it is, it must match the field type; otherwise, abort. 1052 // Note that an invalid token invalidates the entire attribute value, even 1053 // if the other tokens are valid. 1054 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type)) 1055 continue; 1056 1057 // Chrome Autofill ignores these type hints. 1058 tokens.pop_back(); 1059 } 1060 1061 // The preceding token, if any, may be a fixed string that is either 1062 // "shipping" or "billing". Chrome Autofill treats these as implicit 1063 // section name suffixes. 1064 DCHECK_EQ(kDefaultSection, field->section()); 1065 std::string section = field->section(); 1066 HtmlFieldMode mode = HTML_MODE_NONE; 1067 if (!tokens.empty()) { 1068 if (tokens.back() == kShippingMode) 1069 mode = HTML_MODE_SHIPPING; 1070 else if (tokens.back() == kBillingMode) 1071 mode = HTML_MODE_BILLING; 1072 } 1073 1074 if (mode != HTML_MODE_NONE) { 1075 section = "-" + tokens.back(); 1076 tokens.pop_back(); 1077 } 1078 1079 // The preceding token, if any, may be a named section. 1080 const std::string kSectionPrefix = "section-"; 1081 if (!tokens.empty() && 1082 StartsWithASCII(tokens.back(), kSectionPrefix, true)) { 1083 // Prepend this section name to the suffix set in the preceding block. 1084 section = tokens.back().substr(kSectionPrefix.size()) + section; 1085 tokens.pop_back(); 1086 } 1087 1088 // No other tokens are allowed. If there are any remaining, abort. 1089 if (!tokens.empty()) 1090 continue; 1091 1092 if (section != kDefaultSection) { 1093 *found_sections = true; 1094 field->set_section(section); 1095 } 1096 1097 // No errors encountered while parsing! 1098 // Update the |field|'s type based on what was parsed from the attribute. 1099 field->SetHtmlType(field_type, mode); 1100 } 1101} 1102 1103bool FormStructure::FillFields( 1104 const std::vector<ServerFieldType>& types, 1105 const InputFieldComparator& matches, 1106 const base::Callback<base::string16(const AutofillType&)>& get_info, 1107 const std::string& app_locale) { 1108 bool filled_something = false; 1109 for (size_t i = 0; i < field_count(); ++i) { 1110 for (size_t j = 0; j < types.size(); ++j) { 1111 if (matches.Run(types[j], *field(i))) { 1112 AutofillField::FillFormField(*field(i), 1113 get_info.Run(field(i)->Type()), 1114 app_locale, 1115 field(i)); 1116 filled_something = true; 1117 break; 1118 } 1119 } 1120 } 1121 return filled_something; 1122} 1123 1124std::set<base::string16> FormStructure::PossibleValues(ServerFieldType type) { 1125 std::set<base::string16> values; 1126 AutofillType target_type(type); 1127 for (std::vector<AutofillField*>::iterator iter = fields_.begin(); 1128 iter != fields_.end(); ++iter) { 1129 AutofillField* field = *iter; 1130 if (field->Type().GetStorableType() != target_type.GetStorableType() || 1131 field->Type().group() != target_type.group()) { 1132 continue; 1133 } 1134 1135 // No option values; anything goes. 1136 if (field->option_values.empty()) 1137 return std::set<base::string16>(); 1138 1139 for (size_t i = 0; i < field->option_values.size(); ++i) { 1140 if (!field->option_values[i].empty()) 1141 values.insert(base::i18n::ToUpper(field->option_values[i])); 1142 } 1143 1144 for (size_t i = 0; i < field->option_contents.size(); ++i) { 1145 if (!field->option_contents[i].empty()) 1146 values.insert(base::i18n::ToUpper(field->option_contents[i])); 1147 } 1148 } 1149 1150 return values; 1151} 1152 1153void FormStructure::IdentifySections(bool has_author_specified_sections) { 1154 if (fields_.empty()) 1155 return; 1156 1157 if (!has_author_specified_sections) { 1158 // Name sections after the first field in the section. 1159 base::string16 current_section = fields_.front()->unique_name(); 1160 1161 // Keep track of the types we've seen in this section. 1162 std::set<ServerFieldType> seen_types; 1163 ServerFieldType previous_type = UNKNOWN_TYPE; 1164 1165 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1166 field != fields_.end(); ++field) { 1167 const ServerFieldType current_type = (*field)->Type().GetStorableType(); 1168 1169 bool already_saw_current_type = seen_types.count(current_type) > 0; 1170 1171 // Forms often ask for multiple phone numbers -- e.g. both a daytime and 1172 // evening phone number. Our phone number detection is also generally a 1173 // little off. Hence, ignore this field type as a signal here. 1174 if (AutofillType(current_type).group() == PHONE_HOME) 1175 already_saw_current_type = false; 1176 1177 // Some forms have adjacent fields of the same type. Two common examples: 1178 // * Forms with two email fields, where the second is meant to "confirm" 1179 // the first. 1180 // * Forms with a <select> menu for states in some countries, and a 1181 // freeform <input> field for states in other countries. (Usually, 1182 // only one of these two will be visible for any given choice of 1183 // country.) 1184 // Generally, adjacent fields of the same type belong in the same logical 1185 // section. 1186 if (current_type == previous_type) 1187 already_saw_current_type = false; 1188 1189 previous_type = current_type; 1190 1191 if (current_type != UNKNOWN_TYPE && already_saw_current_type) { 1192 // We reached the end of a section, so start a new section. 1193 seen_types.clear(); 1194 current_section = (*field)->unique_name(); 1195 } 1196 1197 seen_types.insert(current_type); 1198 (*field)->set_section(base::UTF16ToUTF8(current_section)); 1199 } 1200 } 1201 1202 // Ensure that credit card and address fields are in separate sections. 1203 // This simplifies the section-aware logic in autofill_manager.cc. 1204 for (std::vector<AutofillField*>::iterator field = fields_.begin(); 1205 field != fields_.end(); ++field) { 1206 FieldTypeGroup field_type_group = (*field)->Type().group(); 1207 if (field_type_group == CREDIT_CARD) 1208 (*field)->set_section((*field)->section() + "-cc"); 1209 else 1210 (*field)->set_section((*field)->section() + "-default"); 1211 } 1212} 1213 1214} // namespace autofill 1215