form_autofill_util.cc revision d0247b1b59f9c528cb6df88b4f2b9afaf80d181e
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/content/renderer/form_autofill_util.h"
6
7#include <map>
8
9#include "base/command_line.h"
10#include "base/logging.h"
11#include "base/memory/scoped_vector.h"
12#include "base/metrics/field_trial.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "components/autofill/core/common/autofill_switches.h"
16#include "components/autofill/core/common/form_data.h"
17#include "components/autofill/core/common/form_field_data.h"
18#include "components/autofill/core/common/web_element_descriptor.h"
19#include "third_party/WebKit/public/platform/WebString.h"
20#include "third_party/WebKit/public/platform/WebVector.h"
21#include "third_party/WebKit/public/web/WebDocument.h"
22#include "third_party/WebKit/public/web/WebElement.h"
23#include "third_party/WebKit/public/web/WebExceptionCode.h"
24#include "third_party/WebKit/public/web/WebFormControlElement.h"
25#include "third_party/WebKit/public/web/WebFormElement.h"
26#include "third_party/WebKit/public/web/WebFrame.h"
27#include "third_party/WebKit/public/web/WebInputElement.h"
28#include "third_party/WebKit/public/web/WebLabelElement.h"
29#include "third_party/WebKit/public/web/WebNode.h"
30#include "third_party/WebKit/public/web/WebNodeList.h"
31#include "third_party/WebKit/public/web/WebOptionElement.h"
32#include "third_party/WebKit/public/web/WebSelectElement.h"
33
34using WebKit::WebDocument;
35using WebKit::WebElement;
36using WebKit::WebExceptionCode;
37using WebKit::WebFormControlElement;
38using WebKit::WebFormElement;
39using WebKit::WebFrame;
40using WebKit::WebInputElement;
41using WebKit::WebLabelElement;
42using WebKit::WebNode;
43using WebKit::WebNodeList;
44using WebKit::WebOptionElement;
45using WebKit::WebSelectElement;
46using WebKit::WebString;
47using WebKit::WebVector;
48
49namespace autofill {
50namespace {
51
52// The maximum length allowed for form data.
53const size_t kMaxDataLength = 1024;
54
55// A bit field mask for FillForm functions to not fill some fields.
56enum FieldFilterMask {
57  FILTER_NONE                       = 0,
58  FILTER_DISABLED_ELEMENTS          = 1 << 0,
59  FILTER_READONLY_ELEMENTS          = 1 << 1,
60  FILTER_NON_FOCUSABLE_ELEMENTS     = 1 << 2,
61  FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
62                                      FILTER_READONLY_ELEMENTS |
63                                      FILTER_NON_FOCUSABLE_ELEMENTS,
64};
65
66bool IsOptionElement(const WebElement& element) {
67  CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
68  return element.hasTagName(kOption);
69}
70
71bool IsScriptElement(const WebElement& element) {
72  CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
73  return element.hasTagName(kScript);
74}
75
76bool IsNoScriptElement(const WebElement& element) {
77  CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
78  return element.hasTagName(kNoScript);
79}
80
81bool HasTagName(const WebNode& node, const WebKit::WebString& tag) {
82  return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
83}
84
85bool IsAutofillableElement(const WebFormControlElement& element) {
86  const WebInputElement* input_element = toWebInputElement(&element);
87  return IsAutofillableInputElement(input_element) || IsSelectElement(element);
88}
89
90// Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
91bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
92  return input_element.autoComplete();
93}
94
95// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
96// to a single space.  If |force_whitespace| is true, then the resulting string
97// is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
98// result includes a space only if |prefix| has trailing whitespace or |suffix|
99// has leading whitespace.
100// A few examples:
101//  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
102//  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
103//  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
104//  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
105//  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
106//  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
107//  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
108//  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
109const base::string16 CombineAndCollapseWhitespace(
110    const base::string16& prefix,
111    const base::string16& suffix,
112    bool force_whitespace) {
113  base::string16 prefix_trimmed;
114  TrimPositions prefix_trailing_whitespace =
115      TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed);
116
117  // Recursively compute the children's text.
118  base::string16 suffix_trimmed;
119  TrimPositions suffix_leading_whitespace =
120      TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed);
121
122  if (prefix_trailing_whitespace || suffix_leading_whitespace ||
123      force_whitespace) {
124    return prefix_trimmed + ASCIIToUTF16(" ") + suffix_trimmed;
125  } else {
126    return prefix_trimmed + suffix_trimmed;
127  }
128}
129
130// This is a helper function for the FindChildText() function (see below).
131// Search depth is limited with the |depth| parameter.
132base::string16 FindChildTextInner(const WebNode& node, int depth) {
133  if (depth <= 0 || node.isNull())
134    return base::string16();
135
136  // Skip over comments.
137  if (node.nodeType() == WebNode::CommentNode)
138    return FindChildTextInner(node.nextSibling(), depth - 1);
139
140  if (node.nodeType() != WebNode::ElementNode &&
141      node.nodeType() != WebNode::TextNode)
142    return base::string16();
143
144  // Ignore elements known not to contain inferable labels.
145  if (node.isElementNode()) {
146    const WebElement element = node.toConst<WebElement>();
147    if (IsOptionElement(element) ||
148        IsScriptElement(element) ||
149        IsNoScriptElement(element) ||
150        (element.isFormControlElement() &&
151         IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
152      return base::string16();
153    }
154  }
155
156  // Extract the text exactly at this node.
157  base::string16 node_text = node.nodeValue();
158
159  // Recursively compute the children's text.
160  // Preserve inter-element whitespace separation.
161  base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
162  bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
163  node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
164
165  // Recursively compute the siblings' text.
166  // Again, preserve inter-element whitespace separation.
167  base::string16 sibling_text =
168      FindChildTextInner(node.nextSibling(), depth - 1);
169  add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
170  node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
171
172  return node_text;
173}
174
175// Returns the aggregated values of the descendants of |element| that are
176// non-empty text nodes.  This is a faster alternative to |innerText()| for
177// performance critical operations.  It does a full depth-first search so can be
178// used when the structure is not directly known.  However, unlike with
179// |innerText()|, the search depth and breadth are limited to a fixed threshold.
180// Whitespace is trimmed from text accumulated at descendant nodes.
181base::string16 FindChildText(const WebNode& node) {
182  if (node.isTextNode())
183    return node.nodeValue();
184
185  WebNode child = node.firstChild();
186
187  const int kChildSearchDepth = 10;
188  base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
189  TrimWhitespace(node_text, TRIM_ALL, &node_text);
190  return node_text;
191}
192
193// Helper for |InferLabelForElement()| that infers a label, if possible, from
194// a previous sibling of |element|,
195// e.g. Some Text <input ...>
196// or   Some <span>Text</span> <input ...>
197// or   <p>Some Text</p><input ...>
198// or   <label>Some Text</label> <input ...>
199// or   Some Text <img><input ...>
200// or   <b>Some Text</b><br/> <input ...>.
201base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
202  base::string16 inferred_label;
203  WebNode previous = element;
204  while (true) {
205    previous = previous.previousSibling();
206    if (previous.isNull())
207      break;
208
209    // Skip over comments.
210    WebNode::NodeType node_type = previous.nodeType();
211    if (node_type == WebNode::CommentNode)
212      continue;
213
214    // Otherwise, only consider normal HTML elements and their contents.
215    if (node_type != WebNode::TextNode &&
216        node_type != WebNode::ElementNode)
217      break;
218
219    // A label might be split across multiple "lightweight" nodes.
220    // Coalesce any text contained in multiple consecutive
221    //  (a) plain text nodes or
222    //  (b) inline HTML elements that are essentially equivalent to text nodes.
223    CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
224    CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
225    CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
226    CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
227    if (previous.isTextNode() ||
228        HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
229        HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
230      base::string16 value = FindChildText(previous);
231      // A text node's value will be empty if it is for a line break.
232      bool add_space = previous.isTextNode() && value.empty();
233      inferred_label =
234          CombineAndCollapseWhitespace(value, inferred_label, add_space);
235      continue;
236    }
237
238    // If we have identified a partial label and have reached a non-lightweight
239    // element, consider the label to be complete.
240    base::string16 trimmed_label;
241    TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label);
242    if (!trimmed_label.empty())
243      break;
244
245    // <img> and <br> tags often appear between the input element and its
246    // label text, so skip over them.
247    CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
248    CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
249    if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
250      continue;
251
252    // We only expect <p> and <label> tags to contain the full label text.
253    CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
254    CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
255    if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
256      inferred_label = FindChildText(previous);
257
258    break;
259  }
260
261  TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
262  return inferred_label;
263}
264
265// Helper for |InferLabelForElement()| that infers a label, if possible, from
266// enclosing list item,
267// e.g. <li>Some Text<input ...><input ...><input ...></tr>
268base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
269  WebNode parent = element.parentNode();
270  CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
271  while (!parent.isNull() && parent.isElementNode() &&
272         !parent.to<WebElement>().hasTagName(kListItem)) {
273    parent = parent.parentNode();
274  }
275
276  if (!parent.isNull() && HasTagName(parent, kListItem))
277    return FindChildText(parent);
278
279  return base::string16();
280}
281
282// Helper for |InferLabelForElement()| that infers a label, if possible, from
283// surrounding table structure,
284// e.g. <tr><td>Some Text</td><td><input ...></td></tr>
285// or   <tr><th>Some Text</th><td><input ...></td></tr>
286// or   <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
287// or   <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
288base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
289  CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
290  WebNode parent = element.parentNode();
291  while (!parent.isNull() && parent.isElementNode() &&
292         !parent.to<WebElement>().hasTagName(kTableCell)) {
293    parent = parent.parentNode();
294  }
295
296  if (parent.isNull())
297    return base::string16();
298
299  // Check all previous siblings, skipping non-element nodes, until we find a
300  // non-empty text block.
301  base::string16 inferred_label;
302  WebNode previous = parent.previousSibling();
303  CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
304  while (inferred_label.empty() && !previous.isNull()) {
305    if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
306      inferred_label = FindChildText(previous);
307
308    previous = previous.previousSibling();
309  }
310
311  return inferred_label;
312}
313
314// Helper for |InferLabelForElement()| that infers a label, if possible, from
315// surrounding table structure,
316// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
317base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
318  CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
319  WebNode parent = element.parentNode();
320  while (!parent.isNull() && parent.isElementNode() &&
321         !parent.to<WebElement>().hasTagName(kTableRow)) {
322    parent = parent.parentNode();
323  }
324
325  if (parent.isNull())
326    return base::string16();
327
328  // Check all previous siblings, skipping non-element nodes, until we find a
329  // non-empty text block.
330  base::string16 inferred_label;
331  WebNode previous = parent.previousSibling();
332  while (inferred_label.empty() && !previous.isNull()) {
333    if (HasTagName(previous, kTableRow))
334      inferred_label = FindChildText(previous);
335
336    previous = previous.previousSibling();
337  }
338
339  return inferred_label;
340}
341
342// Helper for |InferLabelForElement()| that infers a label, if possible, from
343// a surrounding div table,
344// e.g. <div>Some Text<span><input ...></span></div>
345// e.g. <div>Some Text</div><div><input ...></div>
346base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
347  WebNode node = element.parentNode();
348  bool looking_for_parent = true;
349
350  // Search the sibling and parent <div>s until we find a candidate label.
351  base::string16 inferred_label;
352  CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
353  CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
354  CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
355  while (inferred_label.empty() && !node.isNull()) {
356    if (HasTagName(node, kDiv)) {
357      looking_for_parent = false;
358      inferred_label = FindChildText(node);
359    } else if (looking_for_parent &&
360               (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
361      // If the element is in a table or fieldset, its label most likely is too.
362      break;
363    }
364
365    if (node.previousSibling().isNull()) {
366      // If there are no more siblings, continue walking up the tree.
367      looking_for_parent = true;
368    }
369
370    if (looking_for_parent)
371      node = node.parentNode();
372    else
373      node = node.previousSibling();
374  }
375
376  return inferred_label;
377}
378
379// Helper for |InferLabelForElement()| that infers a label, if possible, from
380// a surrounding definition list,
381// e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
382// e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
383base::string16 InferLabelFromDefinitionList(
384    const WebFormControlElement& element) {
385  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
386  WebNode parent = element.parentNode();
387  while (!parent.isNull() && parent.isElementNode() &&
388         !parent.to<WebElement>().hasTagName(kDefinitionData))
389    parent = parent.parentNode();
390
391  if (parent.isNull() || !HasTagName(parent, kDefinitionData))
392    return base::string16();
393
394  // Skip by any intervening text nodes.
395  WebNode previous = parent.previousSibling();
396  while (!previous.isNull() && previous.isTextNode())
397    previous = previous.previousSibling();
398
399  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
400  if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
401    return base::string16();
402
403  return FindChildText(previous);
404}
405
406// Infers corresponding label for |element| from surrounding context in the DOM,
407// e.g. the contents of the preceding <p> tag or text element.
408base::string16 InferLabelForElement(const WebFormControlElement& element) {
409  base::string16 inferred_label = InferLabelFromPrevious(element);
410  if (!inferred_label.empty())
411    return inferred_label;
412
413  // If we didn't find a label, check for list item case.
414  inferred_label = InferLabelFromListItem(element);
415  if (!inferred_label.empty())
416    return inferred_label;
417
418  // If we didn't find a label, check for table cell case.
419  inferred_label = InferLabelFromTableColumn(element);
420  if (!inferred_label.empty())
421    return inferred_label;
422
423  // If we didn't find a label, check for table row case.
424  inferred_label = InferLabelFromTableRow(element);
425  if (!inferred_label.empty())
426    return inferred_label;
427
428  // If we didn't find a label, check for definition list case.
429  inferred_label = InferLabelFromDefinitionList(element);
430  if (!inferred_label.empty())
431    return inferred_label;
432
433  // If we didn't find a label, check for div table case.
434  return InferLabelFromDivTable(element);
435}
436
437// Fills |option_strings| with the values of the <option> elements present in
438// |select_element|.
439void GetOptionStringsFromElement(const WebSelectElement& select_element,
440                                 std::vector<base::string16>* option_values,
441                                 std::vector<base::string16>* option_contents) {
442  DCHECK(!select_element.isNull());
443
444  option_values->clear();
445  option_contents->clear();
446  WebVector<WebElement> list_items = select_element.listItems();
447  option_values->reserve(list_items.size());
448  option_contents->reserve(list_items.size());
449  for (size_t i = 0; i < list_items.size(); ++i) {
450    if (IsOptionElement(list_items[i])) {
451      const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
452      option_values->push_back(option.value());
453      option_contents->push_back(option.text());
454    }
455  }
456}
457
458// The callback type used by |ForEachMatchingFormField()|.
459typedef void (*Callback)(const FormFieldData&,
460                         bool, /* is_initiating_element */
461                         WebKit::WebFormControlElement*);
462
463// For each autofillable field in |data| that matches a field in the |form|,
464// the |callback| is invoked with the corresponding |form| field data.
465void ForEachMatchingFormField(const WebFormElement& form_element,
466                              const WebElement& initiating_element,
467                              const FormData& data,
468                              FieldFilterMask filters,
469                              bool force_override,
470                              Callback callback) {
471  std::vector<WebFormControlElement> control_elements;
472  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
473                              &control_elements);
474
475  if (control_elements.size() != data.fields.size()) {
476    // This case should be reachable only for pathological websites and tests,
477    // which add or remove form fields while the user is interacting with the
478    // Autofill popup.
479    return;
480  }
481
482  // It's possible that the site has injected fields into the form after the
483  // page has loaded, so we can't assert that the size of the cached control
484  // elements is equal to the size of the fields in |form|.  Fortunately, the
485  // one case in the wild where this happens, paypal.com signup form, the fields
486  // are appended to the end of the form and are not visible.
487  for (size_t i = 0; i < control_elements.size(); ++i) {
488    WebFormControlElement* element = &control_elements[i];
489
490    if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
491      // This case should be reachable only for pathological websites, which
492      // rename form fields while the user is interacting with the Autofill
493      // popup.  I (isherman) am not aware of any such websites, and so am
494      // optimistically including a NOTREACHED().  If you ever trip this check,
495      // please file a bug against me.
496      NOTREACHED();
497      continue;
498    }
499
500    bool is_initiating_element = (*element == initiating_element);
501
502    // Only autofill empty fields and the field that initiated the filling,
503    // i.e. the field the user is currently editing and interacting with.
504    const WebInputElement* input_element = toWebInputElement(element);
505    if (!force_override && IsTextInput(input_element) &&
506        !is_initiating_element && !input_element->value().isEmpty())
507      continue;
508
509    if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
510        ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
511        ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
512      continue;
513
514    callback(data.fields[i], is_initiating_element, element);
515  }
516}
517
518// Sets the |field|'s value to the value in |data|.
519// Also sets the "autofilled" attribute, causing the background to be yellow.
520void FillFormField(const FormFieldData& data,
521                   bool is_initiating_node,
522                   WebKit::WebFormControlElement* field) {
523  // Nothing to fill.
524  if (data.value.empty())
525    return;
526
527  WebInputElement* input_element = toWebInputElement(field);
528  if (IsTextInput(input_element)) {
529    // If the maxlength attribute contains a negative value, maxLength()
530    // returns the default maxlength value.
531    input_element->setValue(
532        data.value.substr(0, input_element->maxLength()), true);
533    input_element->setAutofilled(true);
534    if (is_initiating_node) {
535      int length = input_element->value().length();
536      input_element->setSelectionRange(length, length);
537      // Clear the current IME composition (the underline), if there is one.
538      input_element->document().frame()->unmarkText();
539    }
540  } else if (IsSelectElement(*field)) {
541    WebSelectElement select_element = field->to<WebSelectElement>();
542    if (select_element.value() != data.value) {
543      select_element.setValue(data.value);
544      select_element.dispatchFormControlChangeEvent();
545    }
546  } else {
547    DCHECK(IsCheckableElement(input_element));
548    input_element->setChecked(data.is_checked, true);
549  }
550}
551
552// Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
553// Also sets the "autofilled" attribute, causing the background to be yellow.
554void PreviewFormField(const FormFieldData& data,
555                      bool is_initiating_node,
556                      WebKit::WebFormControlElement* field) {
557  // Nothing to preview.
558  if (data.value.empty())
559    return;
560
561  // Only preview input fields. Excludes checkboxes and radio buttons, as there
562  // is no provision for setSuggestedCheckedValue in WebInputElement.
563  WebInputElement* input_element = toWebInputElement(field);
564  if (!IsTextInput(input_element))
565    return;
566
567  // If the maxlength attribute contains a negative value, maxLength()
568  // returns the default maxlength value.
569  input_element->setSuggestedValue(
570      data.value.substr(0, input_element->maxLength()));
571  input_element->setAutofilled(true);
572  if (is_initiating_node) {
573    // Select the part of the text that the user didn't type.
574    input_element->setSelectionRange(input_element->value().length(),
575                                     input_element->suggestedValue().length());
576  }
577}
578
579std::string RetrievalMethodToString(
580    const WebElementDescriptor::RetrievalMethod& method) {
581  switch (method) {
582    case WebElementDescriptor::CSS_SELECTOR:
583      return "CSS_SELECTOR";
584    case WebElementDescriptor::ID:
585      return "ID";
586    case WebElementDescriptor::NONE:
587      return "NONE";
588  }
589  NOTREACHED();
590  return "UNKNOWN";
591}
592
593// Recursively checks whether |node| or any of its children have a non-empty
594// bounding box. The recursion depth is bounded by |depth|.
595bool IsWebNodeVisibleImpl(const WebKit::WebNode& node, const int depth) {
596  if (depth < 0)
597    return false;
598  if (node.hasNonEmptyBoundingBox())
599    return true;
600
601  // The childNodes method is not a const method. Therefore it cannot be called
602  // on a const reference. Therefore we need a const cast.
603  const WebKit::WebNodeList& children =
604      const_cast<WebKit::WebNode&>(node).childNodes();
605  size_t length = children.length();
606  for (size_t i = 0; i < length; ++i) {
607    const WebKit::WebNode& item = children.item(i);
608    if (IsWebNodeVisibleImpl(item, depth - 1))
609      return true;
610  }
611  return false;
612}
613
614}  // namespace
615
616const size_t kMaxParseableFields = 200;
617
618// All text fields, including password fields, should be extracted.
619bool IsTextInput(const WebInputElement* element) {
620  return element && element->isTextField();
621}
622
623bool IsSelectElement(const WebFormControlElement& element) {
624  // Is static for improving performance.
625  CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
626  return element.formControlType() == kSelectOne;
627}
628
629bool IsCheckableElement(const WebInputElement* element) {
630  if (!element)
631    return false;
632
633  return element->isCheckbox() || element->isRadioButton();
634}
635
636bool IsAutofillableInputElement(const WebInputElement* element) {
637  return IsTextInput(element) || IsCheckableElement(element);
638}
639
640const base::string16 GetFormIdentifier(const WebFormElement& form) {
641  base::string16 identifier = form.name();
642  CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
643  if (identifier.empty())
644    identifier = form.getAttribute(kId);
645
646  return identifier;
647}
648
649bool IsWebNodeVisible(const WebKit::WebNode& node) {
650  // In the bug http://crbug.com/237216 the form's bounding box is empty
651  // however the form has non empty children. Thus we need to look at the
652  // form's children.
653  int kNodeSearchDepth = 2;
654  return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
655}
656
657bool ClickElement(const WebDocument& document,
658                  const WebElementDescriptor& element_descriptor) {
659  WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
660  WebKit::WebElement element;
661
662  switch (element_descriptor.retrieval_method) {
663    case WebElementDescriptor::CSS_SELECTOR: {
664      WebExceptionCode ec = 0;
665      element = document.querySelector(web_descriptor, ec);
666      if (ec)
667        DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
668      break;
669    }
670    case WebElementDescriptor::ID:
671      element = document.getElementById(web_descriptor);
672      break;
673    case WebElementDescriptor::NONE:
674      return true;
675  }
676
677  if (element.isNull()) {
678    DVLOG(1) << "Could not find "
679             << element_descriptor.descriptor
680             << " by "
681             << RetrievalMethodToString(element_descriptor.retrieval_method)
682             << ".";
683    return false;
684  }
685
686  element.simulateClick();
687  return true;
688}
689
690// Fills |autofillable_elements| with all the auto-fillable form control
691// elements in |form_element|.
692void ExtractAutofillableElements(
693    const WebFormElement& form_element,
694    RequirementsMask requirements,
695    std::vector<WebFormControlElement>* autofillable_elements) {
696  WebVector<WebFormControlElement> control_elements;
697  form_element.getFormControlElements(control_elements);
698
699  autofillable_elements->clear();
700  for (size_t i = 0; i < control_elements.size(); ++i) {
701    WebFormControlElement element = control_elements[i];
702    if (!IsAutofillableElement(element))
703      continue;
704
705    if (requirements & REQUIRE_AUTOCOMPLETE) {
706      // TODO(jhawkins): WebKit currently doesn't handle the autocomplete
707      // attribute for select control elements, but it probably should.
708      WebInputElement* input_element = toWebInputElement(&control_elements[i]);
709      if (IsAutofillableInputElement(input_element) &&
710          !SatisfiesRequireAutocomplete(*input_element))
711        continue;
712    }
713
714    autofillable_elements->push_back(element);
715  }
716}
717
718void WebFormControlElementToFormField(const WebFormControlElement& element,
719                                      ExtractMask extract_mask,
720                                      FormFieldData* field) {
721  DCHECK(field);
722  DCHECK(!element.isNull());
723  CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
724
725  // The label is not officially part of a WebFormControlElement; however, the
726  // labels for all form control elements are scraped from the DOM and set in
727  // WebFormElementToFormData.
728  field->name = element.nameForAutofill();
729  field->form_control_type = UTF16ToUTF8(element.formControlType());
730  field->autocomplete_attribute =
731      UTF16ToUTF8(element.getAttribute(kAutocomplete));
732  if (field->autocomplete_attribute.size() > kMaxDataLength) {
733    // Discard overly long attribute values to avoid DOS-ing the browser
734    // process.  However, send over a default string to indicate that the
735    // attribute was present.
736    field->autocomplete_attribute = "x-max-data-length-exceeded";
737  }
738
739  if (!IsAutofillableElement(element))
740    return;
741
742  const WebInputElement* input_element = toWebInputElement(&element);
743  if (IsAutofillableInputElement(input_element)) {
744    if (IsTextInput(input_element))
745      field->max_length = input_element->maxLength();
746
747    field->is_autofilled = input_element->isAutofilled();
748    field->is_focusable = input_element->isFocusable();
749    field->is_checkable = IsCheckableElement(input_element);
750    field->is_checked = input_element->isChecked();
751    field->should_autocomplete = input_element->autoComplete();
752    field->text_direction = input_element->directionForFormData() == "rtl" ?
753        base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
754  } else if (extract_mask & EXTRACT_OPTIONS) {
755    // Set option strings on the field if available.
756    DCHECK(IsSelectElement(element));
757    const WebSelectElement select_element = element.toConst<WebSelectElement>();
758    GetOptionStringsFromElement(select_element,
759                                &field->option_values,
760                                &field->option_contents);
761  }
762
763  if (!(extract_mask & EXTRACT_VALUE))
764    return;
765
766  base::string16 value;
767  if (IsAutofillableInputElement(input_element)) {
768    value = input_element->value();
769  } else {
770    DCHECK(IsSelectElement(element));
771    const WebSelectElement select_element = element.toConst<WebSelectElement>();
772    value = select_element.value();
773
774    // Convert the |select_element| value to text if requested.
775    if (extract_mask & EXTRACT_OPTION_TEXT) {
776      WebVector<WebElement> list_items = select_element.listItems();
777      for (size_t i = 0; i < list_items.size(); ++i) {
778        if (IsOptionElement(list_items[i])) {
779          const WebOptionElement option_element =
780              list_items[i].toConst<WebOptionElement>();
781          if (option_element.value() == value) {
782            value = option_element.text();
783            break;
784          }
785        }
786      }
787    }
788  }
789
790  // Constrain the maximum data length to prevent a malicious site from DOS'ing
791  // the browser: http://crbug.com/49332
792  if (value.size() > kMaxDataLength)
793    value = value.substr(0, kMaxDataLength);
794
795  field->value = value;
796}
797
798bool WebFormElementToFormData(
799    const WebKit::WebFormElement& form_element,
800    const WebKit::WebFormControlElement& form_control_element,
801    RequirementsMask requirements,
802    ExtractMask extract_mask,
803    FormData* form,
804    FormFieldData* field) {
805  CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
806  CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
807  CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
808
809  const WebFrame* frame = form_element.document().frame();
810  if (!frame)
811    return false;
812
813  if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
814    return false;
815
816  form->name = GetFormIdentifier(form_element);
817  form->method = form_element.method();
818  form->origin = frame->document().url();
819  form->action = frame->document().completeURL(form_element.action());
820  form->user_submitted = form_element.wasUserSubmitted();
821
822  // If the completed URL is not valid, just use the action we get from
823  // WebKit.
824  if (!form->action.is_valid())
825    form->action = GURL(form_element.action());
826
827  // A map from a FormFieldData's name to the FormFieldData itself.
828  std::map<base::string16, FormFieldData*> name_map;
829
830  // The extracted FormFields.  We use pointers so we can store them in
831  // |name_map|.
832  ScopedVector<FormFieldData> form_fields;
833
834  WebVector<WebFormControlElement> control_elements;
835  form_element.getFormControlElements(control_elements);
836
837  // A vector of bools that indicate whether each field in the form meets the
838  // requirements and thus will be in the resulting |form|.
839  std::vector<bool> fields_extracted(control_elements.size(), false);
840
841  for (size_t i = 0; i < control_elements.size(); ++i) {
842    const WebFormControlElement& control_element = control_elements[i];
843
844    if (!IsAutofillableElement(control_element))
845      continue;
846
847    const WebInputElement* input_element = toWebInputElement(&control_element);
848    if (requirements & REQUIRE_AUTOCOMPLETE &&
849        IsAutofillableInputElement(input_element) &&
850        !SatisfiesRequireAutocomplete(*input_element))
851      continue;
852
853    // Create a new FormFieldData, fill it out and map it to the field's name.
854    FormFieldData* form_field = new FormFieldData;
855    WebFormControlElementToFormField(control_element, extract_mask, form_field);
856    form_fields.push_back(form_field);
857    // TODO(jhawkins): A label element is mapped to a form control element's id.
858    // field->name() will contain the id only if the name does not exist.  Add
859    // an id() method to WebFormControlElement and use that here.
860    name_map[form_field->name] = form_field;
861    fields_extracted[i] = true;
862  }
863
864  // If we failed to extract any fields, give up.  Also, to avoid overly
865  // expensive computation, we impose a maximum number of allowable fields.
866  if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
867    return false;
868
869  // Loop through the label elements inside the form element.  For each label
870  // element, get the corresponding form control element, use the form control
871  // element's name as a key into the <name, FormFieldData> map to find the
872  // previously created FormFieldData and set the FormFieldData's label to the
873  // label.firstChild().nodeValue() of the label element.
874  WebNodeList labels = form_element.getElementsByTagName(kLabel);
875  for (unsigned i = 0; i < labels.length(); ++i) {
876    WebLabelElement label = labels.item(i).to<WebLabelElement>();
877    WebFormControlElement field_element =
878        label.correspondingControl().to<WebFormControlElement>();
879
880    base::string16 element_name;
881    if (field_element.isNull()) {
882      // Sometimes site authors will incorrectly specify the corresponding
883      // field element's name rather than its id, so we compensate here.
884      element_name = label.getAttribute(kFor);
885    } else if (
886        !field_element.isFormControlElement() ||
887        field_element.formControlType() == kHidden) {
888      continue;
889    } else {
890      element_name = field_element.nameForAutofill();
891    }
892
893    std::map<base::string16, FormFieldData*>::iterator iter =
894        name_map.find(element_name);
895    if (iter != name_map.end()) {
896      base::string16 label_text = FindChildText(label);
897
898      // Concatenate labels because some sites might have multiple label
899      // candidates.
900      if (!iter->second->label.empty() && !label_text.empty())
901        iter->second->label += ASCIIToUTF16(" ");
902      iter->second->label += label_text;
903    }
904  }
905
906  // Loop through the form control elements, extracting the label text from
907  // the DOM.  We use the |fields_extracted| vector to make sure we assign the
908  // extracted label to the correct field, as it's possible |form_fields| will
909  // not contain all of the elements in |control_elements|.
910  for (size_t i = 0, field_idx = 0;
911       i < control_elements.size() && field_idx < form_fields.size(); ++i) {
912    // This field didn't meet the requirements, so don't try to find a label
913    // for it.
914    if (!fields_extracted[i])
915      continue;
916
917    const WebFormControlElement& control_element = control_elements[i];
918    if (form_fields[field_idx]->label.empty())
919      form_fields[field_idx]->label = InferLabelForElement(control_element);
920
921    if (field && form_control_element == control_element)
922      *field = *form_fields[field_idx];
923
924    ++field_idx;
925  }
926
927  // Copy the created FormFields into the resulting FormData object.
928  for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
929       iter != form_fields.end(); ++iter) {
930    form->fields.push_back(**iter);
931  }
932
933  return true;
934}
935
936bool FindFormAndFieldForInputElement(const WebInputElement& element,
937                                     FormData* form,
938                                     FormFieldData* field,
939                                     RequirementsMask requirements) {
940  if (!IsAutofillableElement(element))
941    return false;
942
943  const WebFormElement form_element = element.form();
944  if (form_element.isNull())
945    return false;
946
947  ExtractMask extract_mask =
948      static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
949  return WebFormElementToFormData(form_element,
950                                  element,
951                                  requirements,
952                                  extract_mask,
953                                  form,
954                                  field);
955}
956
957void FillForm(const FormData& form, const WebInputElement& element) {
958  WebFormElement form_element = element.form();
959  if (form_element.isNull())
960    return;
961
962  ForEachMatchingFormField(form_element,
963                           element,
964                           form,
965                           FILTER_ALL_NON_EDITIABLE_ELEMENTS,
966                           false, /* dont force override */
967                           &FillFormField);
968}
969
970void FillFormIncludingNonFocusableElements(const FormData& form_data,
971                                           const WebFormElement& form_element) {
972  if (form_element.isNull())
973    return;
974
975  FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
976      FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
977  ForEachMatchingFormField(form_element,
978                           WebInputElement(),
979                           form_data,
980                           filter_mask,
981                           true, /* force override */
982                           &FillFormField);
983}
984
985void FillFormForAllElements(const FormData& form_data,
986                            const WebFormElement& form_element) {
987  if (form_element.isNull())
988    return;
989
990  ForEachMatchingFormField(form_element,
991                           WebInputElement(),
992                           form_data,
993                           FILTER_NONE,
994                           true, /* force override */
995                           &FillFormField);
996}
997
998void PreviewForm(const FormData& form, const WebInputElement& element) {
999  WebFormElement form_element = element.form();
1000  if (form_element.isNull())
1001    return;
1002
1003  ForEachMatchingFormField(form_element,
1004                           element,
1005                           form,
1006                           FILTER_ALL_NON_EDITIABLE_ELEMENTS,
1007                           false, /* dont force override */
1008                           &PreviewFormField);
1009}
1010
1011bool ClearPreviewedFormWithElement(const WebInputElement& element,
1012                                   bool was_autofilled) {
1013  WebFormElement form_element = element.form();
1014  if (form_element.isNull())
1015    return false;
1016
1017  std::vector<WebFormControlElement> control_elements;
1018  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1019                              &control_elements);
1020  for (size_t i = 0; i < control_elements.size(); ++i) {
1021    // Only text input elements can be previewed.
1022    WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1023    if (!IsTextInput(input_element))
1024      continue;
1025
1026    // If the input element is not auto-filled, we did not preview it, so there
1027    // is nothing to reset.
1028    if (!input_element->isAutofilled())
1029      continue;
1030
1031    // There might be unrelated elements in this form which have already been
1032    // auto-filled.  For example, the user might have already filled the address
1033    // part of a form and now be dealing with the credit card section.  We only
1034    // want to reset the auto-filled status for fields that were previewed.
1035    if (input_element->suggestedValue().isEmpty())
1036      continue;
1037
1038    // Clear the suggested value. For the initiating node, also restore the
1039    // original value.
1040    input_element->setSuggestedValue(WebString());
1041    bool is_initiating_node = (element == *input_element);
1042    if (is_initiating_node)
1043      input_element->setAutofilled(was_autofilled);
1044    else
1045      input_element->setAutofilled(false);
1046
1047    // Clearing the suggested value in the focused node (above) can cause
1048    // selection to be lost. We force selection range to restore the text
1049    // cursor.
1050    if (is_initiating_node) {
1051      int length = input_element->value().length();
1052      input_element->setSelectionRange(length, length);
1053    }
1054  }
1055
1056  return true;
1057}
1058
1059bool FormWithElementIsAutofilled(const WebInputElement& element) {
1060  WebFormElement form_element = element.form();
1061  if (form_element.isNull())
1062    return false;
1063
1064  std::vector<WebFormControlElement> control_elements;
1065  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1066                              &control_elements);
1067  for (size_t i = 0; i < control_elements.size(); ++i) {
1068    WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1069    if (!IsAutofillableInputElement(input_element))
1070      continue;
1071
1072    if (input_element->isAutofilled())
1073      return true;
1074  }
1075
1076  return false;
1077}
1078
1079bool IsWebpageEmpty(const WebKit::WebFrame* frame) {
1080  WebKit::WebDocument document = frame->document();
1081
1082  return IsWebElementEmpty(document.head()) &&
1083         IsWebElementEmpty(document.body());
1084}
1085
1086bool IsWebElementEmpty(const WebKit::WebElement& element) {
1087  // This array contains all tags which can be present in an empty page.
1088  const char* const kAllowedValue[] = {
1089    "script",
1090    "meta",
1091    "title",
1092  };
1093  const size_t kAllowedValueLength = arraysize(kAllowedValue);
1094
1095  if (element.isNull())
1096    return true;
1097  // The childNodes method is not a const method. Therefore it cannot be called
1098  // on a const reference. Therefore we need a const cast.
1099  const WebKit::WebNodeList& children =
1100      const_cast<WebKit::WebElement&>(element).childNodes();
1101  for (size_t i = 0; i < children.length(); ++i) {
1102    const WebKit::WebNode& item = children.item(i);
1103
1104    if (item.isTextNode() &&
1105        !ContainsOnlyWhitespaceASCII(item.nodeValue().utf8()))
1106      return false;
1107
1108    // We ignore all other items with names which begin with
1109    // the character # because they are not html tags.
1110    if (item.nodeName().utf8()[0] == '#')
1111      continue;
1112
1113    bool tag_is_allowed = false;
1114    // Test if the item name is in the kAllowedValue array
1115    for (size_t allowed_value_index = 0;
1116         allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1117      if (HasTagName(item,
1118                     WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1119        tag_is_allowed = true;
1120        break;
1121      }
1122    }
1123    if (!tag_is_allowed)
1124      return false;
1125  }
1126  return true;
1127}
1128
1129}  // namespace autofill
1130