form_autofill_util.cc revision d0247b1b59f9c528cb6df88b4f2b9afaf80d181e
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/autofill/content/renderer/form_autofill_util.h" 6 7#include <map> 8 9#include "base/command_line.h" 10#include "base/logging.h" 11#include "base/memory/scoped_vector.h" 12#include "base/metrics/field_trial.h" 13#include "base/strings/string_util.h" 14#include "base/strings/utf_string_conversions.h" 15#include "components/autofill/core/common/autofill_switches.h" 16#include "components/autofill/core/common/form_data.h" 17#include "components/autofill/core/common/form_field_data.h" 18#include "components/autofill/core/common/web_element_descriptor.h" 19#include "third_party/WebKit/public/platform/WebString.h" 20#include "third_party/WebKit/public/platform/WebVector.h" 21#include "third_party/WebKit/public/web/WebDocument.h" 22#include "third_party/WebKit/public/web/WebElement.h" 23#include "third_party/WebKit/public/web/WebExceptionCode.h" 24#include "third_party/WebKit/public/web/WebFormControlElement.h" 25#include "third_party/WebKit/public/web/WebFormElement.h" 26#include "third_party/WebKit/public/web/WebFrame.h" 27#include "third_party/WebKit/public/web/WebInputElement.h" 28#include "third_party/WebKit/public/web/WebLabelElement.h" 29#include "third_party/WebKit/public/web/WebNode.h" 30#include "third_party/WebKit/public/web/WebNodeList.h" 31#include "third_party/WebKit/public/web/WebOptionElement.h" 32#include "third_party/WebKit/public/web/WebSelectElement.h" 33 34using WebKit::WebDocument; 35using WebKit::WebElement; 36using WebKit::WebExceptionCode; 37using WebKit::WebFormControlElement; 38using WebKit::WebFormElement; 39using WebKit::WebFrame; 40using WebKit::WebInputElement; 41using WebKit::WebLabelElement; 42using WebKit::WebNode; 43using WebKit::WebNodeList; 44using WebKit::WebOptionElement; 45using WebKit::WebSelectElement; 46using WebKit::WebString; 47using WebKit::WebVector; 48 49namespace autofill { 50namespace { 51 52// The maximum length allowed for form data. 53const size_t kMaxDataLength = 1024; 54 55// A bit field mask for FillForm functions to not fill some fields. 56enum FieldFilterMask { 57 FILTER_NONE = 0, 58 FILTER_DISABLED_ELEMENTS = 1 << 0, 59 FILTER_READONLY_ELEMENTS = 1 << 1, 60 FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2, 61 FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS | 62 FILTER_READONLY_ELEMENTS | 63 FILTER_NON_FOCUSABLE_ELEMENTS, 64}; 65 66bool IsOptionElement(const WebElement& element) { 67 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option")); 68 return element.hasTagName(kOption); 69} 70 71bool IsScriptElement(const WebElement& element) { 72 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script")); 73 return element.hasTagName(kScript); 74} 75 76bool IsNoScriptElement(const WebElement& element) { 77 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript")); 78 return element.hasTagName(kNoScript); 79} 80 81bool HasTagName(const WebNode& node, const WebKit::WebString& tag) { 82 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag); 83} 84 85bool IsAutofillableElement(const WebFormControlElement& element) { 86 const WebInputElement* input_element = toWebInputElement(&element); 87 return IsAutofillableInputElement(input_element) || IsSelectElement(element); 88} 89 90// Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement. 91bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) { 92 return input_element.autoComplete(); 93} 94 95// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed 96// to a single space. If |force_whitespace| is true, then the resulting string 97// is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the 98// result includes a space only if |prefix| has trailing whitespace or |suffix| 99// has leading whitespace. 100// A few examples: 101// * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar" 102// * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar" 103// * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar" 104// * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar" 105// * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar" 106// * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar" 107// * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar " 108// * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar " 109const base::string16 CombineAndCollapseWhitespace( 110 const base::string16& prefix, 111 const base::string16& suffix, 112 bool force_whitespace) { 113 base::string16 prefix_trimmed; 114 TrimPositions prefix_trailing_whitespace = 115 TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed); 116 117 // Recursively compute the children's text. 118 base::string16 suffix_trimmed; 119 TrimPositions suffix_leading_whitespace = 120 TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed); 121 122 if (prefix_trailing_whitespace || suffix_leading_whitespace || 123 force_whitespace) { 124 return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed; 125 } else { 126 return prefix_trimmed + suffix_trimmed; 127 } 128} 129 130// This is a helper function for the FindChildText() function (see below). 131// Search depth is limited with the |depth| parameter. 132base::string16 FindChildTextInner(const WebNode& node, int depth) { 133 if (depth <= 0 || node.isNull()) 134 return base::string16(); 135 136 // Skip over comments. 137 if (node.nodeType() == WebNode::CommentNode) 138 return FindChildTextInner(node.nextSibling(), depth - 1); 139 140 if (node.nodeType() != WebNode::ElementNode && 141 node.nodeType() != WebNode::TextNode) 142 return base::string16(); 143 144 // Ignore elements known not to contain inferable labels. 145 if (node.isElementNode()) { 146 const WebElement element = node.toConst<WebElement>(); 147 if (IsOptionElement(element) || 148 IsScriptElement(element) || 149 IsNoScriptElement(element) || 150 (element.isFormControlElement() && 151 IsAutofillableElement(element.toConst<WebFormControlElement>()))) { 152 return base::string16(); 153 } 154 } 155 156 // Extract the text exactly at this node. 157 base::string16 node_text = node.nodeValue(); 158 159 // Recursively compute the children's text. 160 // Preserve inter-element whitespace separation. 161 base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1); 162 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 163 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space); 164 165 // Recursively compute the siblings' text. 166 // Again, preserve inter-element whitespace separation. 167 base::string16 sibling_text = 168 FindChildTextInner(node.nextSibling(), depth - 1); 169 add_space = node.nodeType() == WebNode::TextNode && node_text.empty(); 170 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space); 171 172 return node_text; 173} 174 175// Returns the aggregated values of the descendants of |element| that are 176// non-empty text nodes. This is a faster alternative to |innerText()| for 177// performance critical operations. It does a full depth-first search so can be 178// used when the structure is not directly known. However, unlike with 179// |innerText()|, the search depth and breadth are limited to a fixed threshold. 180// Whitespace is trimmed from text accumulated at descendant nodes. 181base::string16 FindChildText(const WebNode& node) { 182 if (node.isTextNode()) 183 return node.nodeValue(); 184 185 WebNode child = node.firstChild(); 186 187 const int kChildSearchDepth = 10; 188 base::string16 node_text = FindChildTextInner(child, kChildSearchDepth); 189 TrimWhitespace(node_text, TRIM_ALL, &node_text); 190 return node_text; 191} 192 193// Helper for |InferLabelForElement()| that infers a label, if possible, from 194// a previous sibling of |element|, 195// e.g. Some Text <input ...> 196// or Some <span>Text</span> <input ...> 197// or <p>Some Text</p><input ...> 198// or <label>Some Text</label> <input ...> 199// or Some Text <img><input ...> 200// or <b>Some Text</b><br/> <input ...>. 201base::string16 InferLabelFromPrevious(const WebFormControlElement& element) { 202 base::string16 inferred_label; 203 WebNode previous = element; 204 while (true) { 205 previous = previous.previousSibling(); 206 if (previous.isNull()) 207 break; 208 209 // Skip over comments. 210 WebNode::NodeType node_type = previous.nodeType(); 211 if (node_type == WebNode::CommentNode) 212 continue; 213 214 // Otherwise, only consider normal HTML elements and their contents. 215 if (node_type != WebNode::TextNode && 216 node_type != WebNode::ElementNode) 217 break; 218 219 // A label might be split across multiple "lightweight" nodes. 220 // Coalesce any text contained in multiple consecutive 221 // (a) plain text nodes or 222 // (b) inline HTML elements that are essentially equivalent to text nodes. 223 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b")); 224 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong")); 225 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span")); 226 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font")); 227 if (previous.isTextNode() || 228 HasTagName(previous, kBold) || HasTagName(previous, kStrong) || 229 HasTagName(previous, kSpan) || HasTagName(previous, kFont)) { 230 base::string16 value = FindChildText(previous); 231 // A text node's value will be empty if it is for a line break. 232 bool add_space = previous.isTextNode() && value.empty(); 233 inferred_label = 234 CombineAndCollapseWhitespace(value, inferred_label, add_space); 235 continue; 236 } 237 238 // If we have identified a partial label and have reached a non-lightweight 239 // element, consider the label to be complete. 240 base::string16 trimmed_label; 241 TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label); 242 if (!trimmed_label.empty()) 243 break; 244 245 // <img> and <br> tags often appear between the input element and its 246 // label text, so skip over them. 247 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img")); 248 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br")); 249 if (HasTagName(previous, kImage) || HasTagName(previous, kBreak)) 250 continue; 251 252 // We only expect <p> and <label> tags to contain the full label text. 253 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p")); 254 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 255 if (HasTagName(previous, kPage) || HasTagName(previous, kLabel)) 256 inferred_label = FindChildText(previous); 257 258 break; 259 } 260 261 TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label); 262 return inferred_label; 263} 264 265// Helper for |InferLabelForElement()| that infers a label, if possible, from 266// enclosing list item, 267// e.g. <li>Some Text<input ...><input ...><input ...></tr> 268base::string16 InferLabelFromListItem(const WebFormControlElement& element) { 269 WebNode parent = element.parentNode(); 270 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li")); 271 while (!parent.isNull() && parent.isElementNode() && 272 !parent.to<WebElement>().hasTagName(kListItem)) { 273 parent = parent.parentNode(); 274 } 275 276 if (!parent.isNull() && HasTagName(parent, kListItem)) 277 return FindChildText(parent); 278 279 return base::string16(); 280} 281 282// Helper for |InferLabelForElement()| that infers a label, if possible, from 283// surrounding table structure, 284// e.g. <tr><td>Some Text</td><td><input ...></td></tr> 285// or <tr><th>Some Text</th><td><input ...></td></tr> 286// or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr> 287// or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr> 288base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) { 289 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td")); 290 WebNode parent = element.parentNode(); 291 while (!parent.isNull() && parent.isElementNode() && 292 !parent.to<WebElement>().hasTagName(kTableCell)) { 293 parent = parent.parentNode(); 294 } 295 296 if (parent.isNull()) 297 return base::string16(); 298 299 // Check all previous siblings, skipping non-element nodes, until we find a 300 // non-empty text block. 301 base::string16 inferred_label; 302 WebNode previous = parent.previousSibling(); 303 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th")); 304 while (inferred_label.empty() && !previous.isNull()) { 305 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader)) 306 inferred_label = FindChildText(previous); 307 308 previous = previous.previousSibling(); 309 } 310 311 return inferred_label; 312} 313 314// Helper for |InferLabelForElement()| that infers a label, if possible, from 315// surrounding table structure, 316// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr> 317base::string16 InferLabelFromTableRow(const WebFormControlElement& element) { 318 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr")); 319 WebNode parent = element.parentNode(); 320 while (!parent.isNull() && parent.isElementNode() && 321 !parent.to<WebElement>().hasTagName(kTableRow)) { 322 parent = parent.parentNode(); 323 } 324 325 if (parent.isNull()) 326 return base::string16(); 327 328 // Check all previous siblings, skipping non-element nodes, until we find a 329 // non-empty text block. 330 base::string16 inferred_label; 331 WebNode previous = parent.previousSibling(); 332 while (inferred_label.empty() && !previous.isNull()) { 333 if (HasTagName(previous, kTableRow)) 334 inferred_label = FindChildText(previous); 335 336 previous = previous.previousSibling(); 337 } 338 339 return inferred_label; 340} 341 342// Helper for |InferLabelForElement()| that infers a label, if possible, from 343// a surrounding div table, 344// e.g. <div>Some Text<span><input ...></span></div> 345// e.g. <div>Some Text</div><div><input ...></div> 346base::string16 InferLabelFromDivTable(const WebFormControlElement& element) { 347 WebNode node = element.parentNode(); 348 bool looking_for_parent = true; 349 350 // Search the sibling and parent <div>s until we find a candidate label. 351 base::string16 inferred_label; 352 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div")); 353 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table")); 354 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset")); 355 while (inferred_label.empty() && !node.isNull()) { 356 if (HasTagName(node, kDiv)) { 357 looking_for_parent = false; 358 inferred_label = FindChildText(node); 359 } else if (looking_for_parent && 360 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) { 361 // If the element is in a table or fieldset, its label most likely is too. 362 break; 363 } 364 365 if (node.previousSibling().isNull()) { 366 // If there are no more siblings, continue walking up the tree. 367 looking_for_parent = true; 368 } 369 370 if (looking_for_parent) 371 node = node.parentNode(); 372 else 373 node = node.previousSibling(); 374 } 375 376 return inferred_label; 377} 378 379// Helper for |InferLabelForElement()| that infers a label, if possible, from 380// a surrounding definition list, 381// e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl> 382// e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl> 383base::string16 InferLabelFromDefinitionList( 384 const WebFormControlElement& element) { 385 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd")); 386 WebNode parent = element.parentNode(); 387 while (!parent.isNull() && parent.isElementNode() && 388 !parent.to<WebElement>().hasTagName(kDefinitionData)) 389 parent = parent.parentNode(); 390 391 if (parent.isNull() || !HasTagName(parent, kDefinitionData)) 392 return base::string16(); 393 394 // Skip by any intervening text nodes. 395 WebNode previous = parent.previousSibling(); 396 while (!previous.isNull() && previous.isTextNode()) 397 previous = previous.previousSibling(); 398 399 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt")); 400 if (previous.isNull() || !HasTagName(previous, kDefinitionTag)) 401 return base::string16(); 402 403 return FindChildText(previous); 404} 405 406// Infers corresponding label for |element| from surrounding context in the DOM, 407// e.g. the contents of the preceding <p> tag or text element. 408base::string16 InferLabelForElement(const WebFormControlElement& element) { 409 base::string16 inferred_label = InferLabelFromPrevious(element); 410 if (!inferred_label.empty()) 411 return inferred_label; 412 413 // If we didn't find a label, check for list item case. 414 inferred_label = InferLabelFromListItem(element); 415 if (!inferred_label.empty()) 416 return inferred_label; 417 418 // If we didn't find a label, check for table cell case. 419 inferred_label = InferLabelFromTableColumn(element); 420 if (!inferred_label.empty()) 421 return inferred_label; 422 423 // If we didn't find a label, check for table row case. 424 inferred_label = InferLabelFromTableRow(element); 425 if (!inferred_label.empty()) 426 return inferred_label; 427 428 // If we didn't find a label, check for definition list case. 429 inferred_label = InferLabelFromDefinitionList(element); 430 if (!inferred_label.empty()) 431 return inferred_label; 432 433 // If we didn't find a label, check for div table case. 434 return InferLabelFromDivTable(element); 435} 436 437// Fills |option_strings| with the values of the <option> elements present in 438// |select_element|. 439void GetOptionStringsFromElement(const WebSelectElement& select_element, 440 std::vector<base::string16>* option_values, 441 std::vector<base::string16>* option_contents) { 442 DCHECK(!select_element.isNull()); 443 444 option_values->clear(); 445 option_contents->clear(); 446 WebVector<WebElement> list_items = select_element.listItems(); 447 option_values->reserve(list_items.size()); 448 option_contents->reserve(list_items.size()); 449 for (size_t i = 0; i < list_items.size(); ++i) { 450 if (IsOptionElement(list_items[i])) { 451 const WebOptionElement option = list_items[i].toConst<WebOptionElement>(); 452 option_values->push_back(option.value()); 453 option_contents->push_back(option.text()); 454 } 455 } 456} 457 458// The callback type used by |ForEachMatchingFormField()|. 459typedef void (*Callback)(const FormFieldData&, 460 bool, /* is_initiating_element */ 461 WebKit::WebFormControlElement*); 462 463// For each autofillable field in |data| that matches a field in the |form|, 464// the |callback| is invoked with the corresponding |form| field data. 465void ForEachMatchingFormField(const WebFormElement& form_element, 466 const WebElement& initiating_element, 467 const FormData& data, 468 FieldFilterMask filters, 469 bool force_override, 470 Callback callback) { 471 std::vector<WebFormControlElement> control_elements; 472 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 473 &control_elements); 474 475 if (control_elements.size() != data.fields.size()) { 476 // This case should be reachable only for pathological websites and tests, 477 // which add or remove form fields while the user is interacting with the 478 // Autofill popup. 479 return; 480 } 481 482 // It's possible that the site has injected fields into the form after the 483 // page has loaded, so we can't assert that the size of the cached control 484 // elements is equal to the size of the fields in |form|. Fortunately, the 485 // one case in the wild where this happens, paypal.com signup form, the fields 486 // are appended to the end of the form and are not visible. 487 for (size_t i = 0; i < control_elements.size(); ++i) { 488 WebFormControlElement* element = &control_elements[i]; 489 490 if (base::string16(element->nameForAutofill()) != data.fields[i].name) { 491 // This case should be reachable only for pathological websites, which 492 // rename form fields while the user is interacting with the Autofill 493 // popup. I (isherman) am not aware of any such websites, and so am 494 // optimistically including a NOTREACHED(). If you ever trip this check, 495 // please file a bug against me. 496 NOTREACHED(); 497 continue; 498 } 499 500 bool is_initiating_element = (*element == initiating_element); 501 502 // Only autofill empty fields and the field that initiated the filling, 503 // i.e. the field the user is currently editing and interacting with. 504 const WebInputElement* input_element = toWebInputElement(element); 505 if (!force_override && IsTextInput(input_element) && 506 !is_initiating_element && !input_element->value().isEmpty()) 507 continue; 508 509 if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) || 510 ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) || 511 ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable())) 512 continue; 513 514 callback(data.fields[i], is_initiating_element, element); 515 } 516} 517 518// Sets the |field|'s value to the value in |data|. 519// Also sets the "autofilled" attribute, causing the background to be yellow. 520void FillFormField(const FormFieldData& data, 521 bool is_initiating_node, 522 WebKit::WebFormControlElement* field) { 523 // Nothing to fill. 524 if (data.value.empty()) 525 return; 526 527 WebInputElement* input_element = toWebInputElement(field); 528 if (IsTextInput(input_element)) { 529 // If the maxlength attribute contains a negative value, maxLength() 530 // returns the default maxlength value. 531 input_element->setValue( 532 data.value.substr(0, input_element->maxLength()), true); 533 input_element->setAutofilled(true); 534 if (is_initiating_node) { 535 int length = input_element->value().length(); 536 input_element->setSelectionRange(length, length); 537 // Clear the current IME composition (the underline), if there is one. 538 input_element->document().frame()->unmarkText(); 539 } 540 } else if (IsSelectElement(*field)) { 541 WebSelectElement select_element = field->to<WebSelectElement>(); 542 if (select_element.value() != data.value) { 543 select_element.setValue(data.value); 544 select_element.dispatchFormControlChangeEvent(); 545 } 546 } else { 547 DCHECK(IsCheckableElement(input_element)); 548 input_element->setChecked(data.is_checked, true); 549 } 550} 551 552// Sets the |field|'s "suggested" (non JS visible) value to the value in |data|. 553// Also sets the "autofilled" attribute, causing the background to be yellow. 554void PreviewFormField(const FormFieldData& data, 555 bool is_initiating_node, 556 WebKit::WebFormControlElement* field) { 557 // Nothing to preview. 558 if (data.value.empty()) 559 return; 560 561 // Only preview input fields. Excludes checkboxes and radio buttons, as there 562 // is no provision for setSuggestedCheckedValue in WebInputElement. 563 WebInputElement* input_element = toWebInputElement(field); 564 if (!IsTextInput(input_element)) 565 return; 566 567 // If the maxlength attribute contains a negative value, maxLength() 568 // returns the default maxlength value. 569 input_element->setSuggestedValue( 570 data.value.substr(0, input_element->maxLength())); 571 input_element->setAutofilled(true); 572 if (is_initiating_node) { 573 // Select the part of the text that the user didn't type. 574 input_element->setSelectionRange(input_element->value().length(), 575 input_element->suggestedValue().length()); 576 } 577} 578 579std::string RetrievalMethodToString( 580 const WebElementDescriptor::RetrievalMethod& method) { 581 switch (method) { 582 case WebElementDescriptor::CSS_SELECTOR: 583 return "CSS_SELECTOR"; 584 case WebElementDescriptor::ID: 585 return "ID"; 586 case WebElementDescriptor::NONE: 587 return "NONE"; 588 } 589 NOTREACHED(); 590 return "UNKNOWN"; 591} 592 593// Recursively checks whether |node| or any of its children have a non-empty 594// bounding box. The recursion depth is bounded by |depth|. 595bool IsWebNodeVisibleImpl(const WebKit::WebNode& node, const int depth) { 596 if (depth < 0) 597 return false; 598 if (node.hasNonEmptyBoundingBox()) 599 return true; 600 601 // The childNodes method is not a const method. Therefore it cannot be called 602 // on a const reference. Therefore we need a const cast. 603 const WebKit::WebNodeList& children = 604 const_cast<WebKit::WebNode&>(node).childNodes(); 605 size_t length = children.length(); 606 for (size_t i = 0; i < length; ++i) { 607 const WebKit::WebNode& item = children.item(i); 608 if (IsWebNodeVisibleImpl(item, depth - 1)) 609 return true; 610 } 611 return false; 612} 613 614} // namespace 615 616const size_t kMaxParseableFields = 200; 617 618// All text fields, including password fields, should be extracted. 619bool IsTextInput(const WebInputElement* element) { 620 return element && element->isTextField(); 621} 622 623bool IsSelectElement(const WebFormControlElement& element) { 624 // Is static for improving performance. 625 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); 626 return element.formControlType() == kSelectOne; 627} 628 629bool IsCheckableElement(const WebInputElement* element) { 630 if (!element) 631 return false; 632 633 return element->isCheckbox() || element->isRadioButton(); 634} 635 636bool IsAutofillableInputElement(const WebInputElement* element) { 637 return IsTextInput(element) || IsCheckableElement(element); 638} 639 640const base::string16 GetFormIdentifier(const WebFormElement& form) { 641 base::string16 identifier = form.name(); 642 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id")); 643 if (identifier.empty()) 644 identifier = form.getAttribute(kId); 645 646 return identifier; 647} 648 649bool IsWebNodeVisible(const WebKit::WebNode& node) { 650 // In the bug http://crbug.com/237216 the form's bounding box is empty 651 // however the form has non empty children. Thus we need to look at the 652 // form's children. 653 int kNodeSearchDepth = 2; 654 return IsWebNodeVisibleImpl(node, kNodeSearchDepth); 655} 656 657bool ClickElement(const WebDocument& document, 658 const WebElementDescriptor& element_descriptor) { 659 WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor); 660 WebKit::WebElement element; 661 662 switch (element_descriptor.retrieval_method) { 663 case WebElementDescriptor::CSS_SELECTOR: { 664 WebExceptionCode ec = 0; 665 element = document.querySelector(web_descriptor, ec); 666 if (ec) 667 DVLOG(1) << "Query selector failed. Error code: " << ec << "."; 668 break; 669 } 670 case WebElementDescriptor::ID: 671 element = document.getElementById(web_descriptor); 672 break; 673 case WebElementDescriptor::NONE: 674 return true; 675 } 676 677 if (element.isNull()) { 678 DVLOG(1) << "Could not find " 679 << element_descriptor.descriptor 680 << " by " 681 << RetrievalMethodToString(element_descriptor.retrieval_method) 682 << "."; 683 return false; 684 } 685 686 element.simulateClick(); 687 return true; 688} 689 690// Fills |autofillable_elements| with all the auto-fillable form control 691// elements in |form_element|. 692void ExtractAutofillableElements( 693 const WebFormElement& form_element, 694 RequirementsMask requirements, 695 std::vector<WebFormControlElement>* autofillable_elements) { 696 WebVector<WebFormControlElement> control_elements; 697 form_element.getFormControlElements(control_elements); 698 699 autofillable_elements->clear(); 700 for (size_t i = 0; i < control_elements.size(); ++i) { 701 WebFormControlElement element = control_elements[i]; 702 if (!IsAutofillableElement(element)) 703 continue; 704 705 if (requirements & REQUIRE_AUTOCOMPLETE) { 706 // TODO(jhawkins): WebKit currently doesn't handle the autocomplete 707 // attribute for select control elements, but it probably should. 708 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 709 if (IsAutofillableInputElement(input_element) && 710 !SatisfiesRequireAutocomplete(*input_element)) 711 continue; 712 } 713 714 autofillable_elements->push_back(element); 715 } 716} 717 718void WebFormControlElementToFormField(const WebFormControlElement& element, 719 ExtractMask extract_mask, 720 FormFieldData* field) { 721 DCHECK(field); 722 DCHECK(!element.isNull()); 723 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete")); 724 725 // The label is not officially part of a WebFormControlElement; however, the 726 // labels for all form control elements are scraped from the DOM and set in 727 // WebFormElementToFormData. 728 field->name = element.nameForAutofill(); 729 field->form_control_type = UTF16ToUTF8(element.formControlType()); 730 field->autocomplete_attribute = 731 UTF16ToUTF8(element.getAttribute(kAutocomplete)); 732 if (field->autocomplete_attribute.size() > kMaxDataLength) { 733 // Discard overly long attribute values to avoid DOS-ing the browser 734 // process. However, send over a default string to indicate that the 735 // attribute was present. 736 field->autocomplete_attribute = "x-max-data-length-exceeded"; 737 } 738 739 if (!IsAutofillableElement(element)) 740 return; 741 742 const WebInputElement* input_element = toWebInputElement(&element); 743 if (IsAutofillableInputElement(input_element)) { 744 if (IsTextInput(input_element)) 745 field->max_length = input_element->maxLength(); 746 747 field->is_autofilled = input_element->isAutofilled(); 748 field->is_focusable = input_element->isFocusable(); 749 field->is_checkable = IsCheckableElement(input_element); 750 field->is_checked = input_element->isChecked(); 751 field->should_autocomplete = input_element->autoComplete(); 752 field->text_direction = input_element->directionForFormData() == "rtl" ? 753 base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT; 754 } else if (extract_mask & EXTRACT_OPTIONS) { 755 // Set option strings on the field if available. 756 DCHECK(IsSelectElement(element)); 757 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 758 GetOptionStringsFromElement(select_element, 759 &field->option_values, 760 &field->option_contents); 761 } 762 763 if (!(extract_mask & EXTRACT_VALUE)) 764 return; 765 766 base::string16 value; 767 if (IsAutofillableInputElement(input_element)) { 768 value = input_element->value(); 769 } else { 770 DCHECK(IsSelectElement(element)); 771 const WebSelectElement select_element = element.toConst<WebSelectElement>(); 772 value = select_element.value(); 773 774 // Convert the |select_element| value to text if requested. 775 if (extract_mask & EXTRACT_OPTION_TEXT) { 776 WebVector<WebElement> list_items = select_element.listItems(); 777 for (size_t i = 0; i < list_items.size(); ++i) { 778 if (IsOptionElement(list_items[i])) { 779 const WebOptionElement option_element = 780 list_items[i].toConst<WebOptionElement>(); 781 if (option_element.value() == value) { 782 value = option_element.text(); 783 break; 784 } 785 } 786 } 787 } 788 } 789 790 // Constrain the maximum data length to prevent a malicious site from DOS'ing 791 // the browser: http://crbug.com/49332 792 if (value.size() > kMaxDataLength) 793 value = value.substr(0, kMaxDataLength); 794 795 field->value = value; 796} 797 798bool WebFormElementToFormData( 799 const WebKit::WebFormElement& form_element, 800 const WebKit::WebFormControlElement& form_control_element, 801 RequirementsMask requirements, 802 ExtractMask extract_mask, 803 FormData* form, 804 FormFieldData* field) { 805 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label")); 806 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for")); 807 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); 808 809 const WebFrame* frame = form_element.document().frame(); 810 if (!frame) 811 return false; 812 813 if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete()) 814 return false; 815 816 form->name = GetFormIdentifier(form_element); 817 form->method = form_element.method(); 818 form->origin = frame->document().url(); 819 form->action = frame->document().completeURL(form_element.action()); 820 form->user_submitted = form_element.wasUserSubmitted(); 821 822 // If the completed URL is not valid, just use the action we get from 823 // WebKit. 824 if (!form->action.is_valid()) 825 form->action = GURL(form_element.action()); 826 827 // A map from a FormFieldData's name to the FormFieldData itself. 828 std::map<base::string16, FormFieldData*> name_map; 829 830 // The extracted FormFields. We use pointers so we can store them in 831 // |name_map|. 832 ScopedVector<FormFieldData> form_fields; 833 834 WebVector<WebFormControlElement> control_elements; 835 form_element.getFormControlElements(control_elements); 836 837 // A vector of bools that indicate whether each field in the form meets the 838 // requirements and thus will be in the resulting |form|. 839 std::vector<bool> fields_extracted(control_elements.size(), false); 840 841 for (size_t i = 0; i < control_elements.size(); ++i) { 842 const WebFormControlElement& control_element = control_elements[i]; 843 844 if (!IsAutofillableElement(control_element)) 845 continue; 846 847 const WebInputElement* input_element = toWebInputElement(&control_element); 848 if (requirements & REQUIRE_AUTOCOMPLETE && 849 IsAutofillableInputElement(input_element) && 850 !SatisfiesRequireAutocomplete(*input_element)) 851 continue; 852 853 // Create a new FormFieldData, fill it out and map it to the field's name. 854 FormFieldData* form_field = new FormFieldData; 855 WebFormControlElementToFormField(control_element, extract_mask, form_field); 856 form_fields.push_back(form_field); 857 // TODO(jhawkins): A label element is mapped to a form control element's id. 858 // field->name() will contain the id only if the name does not exist. Add 859 // an id() method to WebFormControlElement and use that here. 860 name_map[form_field->name] = form_field; 861 fields_extracted[i] = true; 862 } 863 864 // If we failed to extract any fields, give up. Also, to avoid overly 865 // expensive computation, we impose a maximum number of allowable fields. 866 if (form_fields.empty() || form_fields.size() > kMaxParseableFields) 867 return false; 868 869 // Loop through the label elements inside the form element. For each label 870 // element, get the corresponding form control element, use the form control 871 // element's name as a key into the <name, FormFieldData> map to find the 872 // previously created FormFieldData and set the FormFieldData's label to the 873 // label.firstChild().nodeValue() of the label element. 874 WebNodeList labels = form_element.getElementsByTagName(kLabel); 875 for (unsigned i = 0; i < labels.length(); ++i) { 876 WebLabelElement label = labels.item(i).to<WebLabelElement>(); 877 WebFormControlElement field_element = 878 label.correspondingControl().to<WebFormControlElement>(); 879 880 base::string16 element_name; 881 if (field_element.isNull()) { 882 // Sometimes site authors will incorrectly specify the corresponding 883 // field element's name rather than its id, so we compensate here. 884 element_name = label.getAttribute(kFor); 885 } else if ( 886 !field_element.isFormControlElement() || 887 field_element.formControlType() == kHidden) { 888 continue; 889 } else { 890 element_name = field_element.nameForAutofill(); 891 } 892 893 std::map<base::string16, FormFieldData*>::iterator iter = 894 name_map.find(element_name); 895 if (iter != name_map.end()) { 896 base::string16 label_text = FindChildText(label); 897 898 // Concatenate labels because some sites might have multiple label 899 // candidates. 900 if (!iter->second->label.empty() && !label_text.empty()) 901 iter->second->label += ASCIIToUTF16(" "); 902 iter->second->label += label_text; 903 } 904 } 905 906 // Loop through the form control elements, extracting the label text from 907 // the DOM. We use the |fields_extracted| vector to make sure we assign the 908 // extracted label to the correct field, as it's possible |form_fields| will 909 // not contain all of the elements in |control_elements|. 910 for (size_t i = 0, field_idx = 0; 911 i < control_elements.size() && field_idx < form_fields.size(); ++i) { 912 // This field didn't meet the requirements, so don't try to find a label 913 // for it. 914 if (!fields_extracted[i]) 915 continue; 916 917 const WebFormControlElement& control_element = control_elements[i]; 918 if (form_fields[field_idx]->label.empty()) 919 form_fields[field_idx]->label = InferLabelForElement(control_element); 920 921 if (field && form_control_element == control_element) 922 *field = *form_fields[field_idx]; 923 924 ++field_idx; 925 } 926 927 // Copy the created FormFields into the resulting FormData object. 928 for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin(); 929 iter != form_fields.end(); ++iter) { 930 form->fields.push_back(**iter); 931 } 932 933 return true; 934} 935 936bool FindFormAndFieldForInputElement(const WebInputElement& element, 937 FormData* form, 938 FormFieldData* field, 939 RequirementsMask requirements) { 940 if (!IsAutofillableElement(element)) 941 return false; 942 943 const WebFormElement form_element = element.form(); 944 if (form_element.isNull()) 945 return false; 946 947 ExtractMask extract_mask = 948 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS); 949 return WebFormElementToFormData(form_element, 950 element, 951 requirements, 952 extract_mask, 953 form, 954 field); 955} 956 957void FillForm(const FormData& form, const WebInputElement& element) { 958 WebFormElement form_element = element.form(); 959 if (form_element.isNull()) 960 return; 961 962 ForEachMatchingFormField(form_element, 963 element, 964 form, 965 FILTER_ALL_NON_EDITIABLE_ELEMENTS, 966 false, /* dont force override */ 967 &FillFormField); 968} 969 970void FillFormIncludingNonFocusableElements(const FormData& form_data, 971 const WebFormElement& form_element) { 972 if (form_element.isNull()) 973 return; 974 975 FieldFilterMask filter_mask = static_cast<FieldFilterMask>( 976 FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS); 977 ForEachMatchingFormField(form_element, 978 WebInputElement(), 979 form_data, 980 filter_mask, 981 true, /* force override */ 982 &FillFormField); 983} 984 985void FillFormForAllElements(const FormData& form_data, 986 const WebFormElement& form_element) { 987 if (form_element.isNull()) 988 return; 989 990 ForEachMatchingFormField(form_element, 991 WebInputElement(), 992 form_data, 993 FILTER_NONE, 994 true, /* force override */ 995 &FillFormField); 996} 997 998void PreviewForm(const FormData& form, const WebInputElement& element) { 999 WebFormElement form_element = element.form(); 1000 if (form_element.isNull()) 1001 return; 1002 1003 ForEachMatchingFormField(form_element, 1004 element, 1005 form, 1006 FILTER_ALL_NON_EDITIABLE_ELEMENTS, 1007 false, /* dont force override */ 1008 &PreviewFormField); 1009} 1010 1011bool ClearPreviewedFormWithElement(const WebInputElement& element, 1012 bool was_autofilled) { 1013 WebFormElement form_element = element.form(); 1014 if (form_element.isNull()) 1015 return false; 1016 1017 std::vector<WebFormControlElement> control_elements; 1018 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1019 &control_elements); 1020 for (size_t i = 0; i < control_elements.size(); ++i) { 1021 // Only text input elements can be previewed. 1022 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 1023 if (!IsTextInput(input_element)) 1024 continue; 1025 1026 // If the input element is not auto-filled, we did not preview it, so there 1027 // is nothing to reset. 1028 if (!input_element->isAutofilled()) 1029 continue; 1030 1031 // There might be unrelated elements in this form which have already been 1032 // auto-filled. For example, the user might have already filled the address 1033 // part of a form and now be dealing with the credit card section. We only 1034 // want to reset the auto-filled status for fields that were previewed. 1035 if (input_element->suggestedValue().isEmpty()) 1036 continue; 1037 1038 // Clear the suggested value. For the initiating node, also restore the 1039 // original value. 1040 input_element->setSuggestedValue(WebString()); 1041 bool is_initiating_node = (element == *input_element); 1042 if (is_initiating_node) 1043 input_element->setAutofilled(was_autofilled); 1044 else 1045 input_element->setAutofilled(false); 1046 1047 // Clearing the suggested value in the focused node (above) can cause 1048 // selection to be lost. We force selection range to restore the text 1049 // cursor. 1050 if (is_initiating_node) { 1051 int length = input_element->value().length(); 1052 input_element->setSelectionRange(length, length); 1053 } 1054 } 1055 1056 return true; 1057} 1058 1059bool FormWithElementIsAutofilled(const WebInputElement& element) { 1060 WebFormElement form_element = element.form(); 1061 if (form_element.isNull()) 1062 return false; 1063 1064 std::vector<WebFormControlElement> control_elements; 1065 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE, 1066 &control_elements); 1067 for (size_t i = 0; i < control_elements.size(); ++i) { 1068 WebInputElement* input_element = toWebInputElement(&control_elements[i]); 1069 if (!IsAutofillableInputElement(input_element)) 1070 continue; 1071 1072 if (input_element->isAutofilled()) 1073 return true; 1074 } 1075 1076 return false; 1077} 1078 1079bool IsWebpageEmpty(const WebKit::WebFrame* frame) { 1080 WebKit::WebDocument document = frame->document(); 1081 1082 return IsWebElementEmpty(document.head()) && 1083 IsWebElementEmpty(document.body()); 1084} 1085 1086bool IsWebElementEmpty(const WebKit::WebElement& element) { 1087 // This array contains all tags which can be present in an empty page. 1088 const char* const kAllowedValue[] = { 1089 "script", 1090 "meta", 1091 "title", 1092 }; 1093 const size_t kAllowedValueLength = arraysize(kAllowedValue); 1094 1095 if (element.isNull()) 1096 return true; 1097 // The childNodes method is not a const method. Therefore it cannot be called 1098 // on a const reference. Therefore we need a const cast. 1099 const WebKit::WebNodeList& children = 1100 const_cast<WebKit::WebElement&>(element).childNodes(); 1101 for (size_t i = 0; i < children.length(); ++i) { 1102 const WebKit::WebNode& item = children.item(i); 1103 1104 if (item.isTextNode() && 1105 !ContainsOnlyWhitespaceASCII(item.nodeValue().utf8())) 1106 return false; 1107 1108 // We ignore all other items with names which begin with 1109 // the character # because they are not html tags. 1110 if (item.nodeName().utf8()[0] == '#') 1111 continue; 1112 1113 bool tag_is_allowed = false; 1114 // Test if the item name is in the kAllowedValue array 1115 for (size_t allowed_value_index = 0; 1116 allowed_value_index < kAllowedValueLength; ++allowed_value_index) { 1117 if (HasTagName(item, 1118 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) { 1119 tag_is_allowed = true; 1120 break; 1121 } 1122 } 1123 if (!tag_is_allowed) 1124 return false; 1125 } 1126 return true; 1127} 1128 1129} // namespace autofill 1130