form_autofill_util.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/content/renderer/form_autofill_util.h"
6
7#include <map>
8
9#include "base/command_line.h"
10#include "base/logging.h"
11#include "base/memory/scoped_vector.h"
12#include "base/metrics/field_trial.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "components/autofill/core/common/autofill_data_validation.h"
16#include "components/autofill/core/common/autofill_switches.h"
17#include "components/autofill/core/common/form_data.h"
18#include "components/autofill/core/common/form_field_data.h"
19#include "components/autofill/core/common/web_element_descriptor.h"
20#include "third_party/WebKit/public/platform/WebString.h"
21#include "third_party/WebKit/public/platform/WebVector.h"
22#include "third_party/WebKit/public/web/WebDocument.h"
23#include "third_party/WebKit/public/web/WebElement.h"
24#include "third_party/WebKit/public/web/WebElementCollection.h"
25#include "third_party/WebKit/public/web/WebExceptionCode.h"
26#include "third_party/WebKit/public/web/WebFormControlElement.h"
27#include "third_party/WebKit/public/web/WebFormElement.h"
28#include "third_party/WebKit/public/web/WebFrame.h"
29#include "third_party/WebKit/public/web/WebInputElement.h"
30#include "third_party/WebKit/public/web/WebLabelElement.h"
31#include "third_party/WebKit/public/web/WebNode.h"
32#include "third_party/WebKit/public/web/WebNodeList.h"
33#include "third_party/WebKit/public/web/WebOptionElement.h"
34#include "third_party/WebKit/public/web/WebSelectElement.h"
35#include "third_party/WebKit/public/web/WebTextAreaElement.h"
36
37using blink::WebDocument;
38using blink::WebElement;
39using blink::WebElementCollection;
40using blink::WebExceptionCode;
41using blink::WebFormControlElement;
42using blink::WebFormElement;
43using blink::WebFrame;
44using blink::WebInputElement;
45using blink::WebLabelElement;
46using blink::WebNode;
47using blink::WebNodeList;
48using blink::WebOptionElement;
49using blink::WebSelectElement;
50using blink::WebTextAreaElement;
51using blink::WebString;
52using blink::WebVector;
53
54namespace autofill {
55namespace {
56
57// A bit field mask for FillForm functions to not fill some fields.
58enum FieldFilterMask {
59  FILTER_NONE                       = 0,
60  FILTER_DISABLED_ELEMENTS          = 1 << 0,
61  FILTER_READONLY_ELEMENTS          = 1 << 1,
62  FILTER_NON_FOCUSABLE_ELEMENTS     = 1 << 2,
63  FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
64                                      FILTER_READONLY_ELEMENTS |
65                                      FILTER_NON_FOCUSABLE_ELEMENTS,
66};
67
68bool IsOptionElement(const WebElement& element) {
69  CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
70  return element.hasTagName(kOption);
71}
72
73bool IsScriptElement(const WebElement& element) {
74  CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
75  return element.hasTagName(kScript);
76}
77
78bool IsNoScriptElement(const WebElement& element) {
79  CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
80  return element.hasTagName(kNoScript);
81}
82
83bool HasTagName(const WebNode& node, const blink::WebString& tag) {
84  return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
85}
86
87bool IsAutofillableElement(const WebFormControlElement& element) {
88  const WebInputElement* input_element = toWebInputElement(&element);
89  return IsAutofillableInputElement(input_element) ||
90         IsSelectElement(element) ||
91         IsTextAreaElement(element);
92}
93
94// Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
95bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
96  return input_element.autoComplete();
97}
98
99// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
100// to a single space.  If |force_whitespace| is true, then the resulting string
101// is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
102// result includes a space only if |prefix| has trailing whitespace or |suffix|
103// has leading whitespace.
104// A few examples:
105//  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
106//  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
107//  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
108//  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
109//  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
110//  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
111//  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
112//  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
113const base::string16 CombineAndCollapseWhitespace(
114    const base::string16& prefix,
115    const base::string16& suffix,
116    bool force_whitespace) {
117  base::string16 prefix_trimmed;
118  TrimPositions prefix_trailing_whitespace =
119      TrimWhitespace(prefix, TRIM_TRAILING, &prefix_trimmed);
120
121  // Recursively compute the children's text.
122  base::string16 suffix_trimmed;
123  TrimPositions suffix_leading_whitespace =
124      TrimWhitespace(suffix, TRIM_LEADING, &suffix_trimmed);
125
126  if (prefix_trailing_whitespace || suffix_leading_whitespace ||
127      force_whitespace) {
128    return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
129  } else {
130    return prefix_trimmed + suffix_trimmed;
131  }
132}
133
134// This is a helper function for the FindChildText() function (see below).
135// Search depth is limited with the |depth| parameter.
136base::string16 FindChildTextInner(const WebNode& node, int depth) {
137  if (depth <= 0 || node.isNull())
138    return base::string16();
139
140  // Skip over comments.
141  if (node.nodeType() == WebNode::CommentNode)
142    return FindChildTextInner(node.nextSibling(), depth - 1);
143
144  if (node.nodeType() != WebNode::ElementNode &&
145      node.nodeType() != WebNode::TextNode)
146    return base::string16();
147
148  // Ignore elements known not to contain inferable labels.
149  if (node.isElementNode()) {
150    const WebElement element = node.toConst<WebElement>();
151    if (IsOptionElement(element) ||
152        IsScriptElement(element) ||
153        IsNoScriptElement(element) ||
154        (element.isFormControlElement() &&
155         IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
156      return base::string16();
157    }
158  }
159
160  // Extract the text exactly at this node.
161  base::string16 node_text = node.nodeValue();
162
163  // Recursively compute the children's text.
164  // Preserve inter-element whitespace separation.
165  base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
166  bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
167  node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
168
169  // Recursively compute the siblings' text.
170  // Again, preserve inter-element whitespace separation.
171  base::string16 sibling_text =
172      FindChildTextInner(node.nextSibling(), depth - 1);
173  add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
174  node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
175
176  return node_text;
177}
178
179// Returns the aggregated values of the descendants of |element| that are
180// non-empty text nodes.  This is a faster alternative to |innerText()| for
181// performance critical operations.  It does a full depth-first search so can be
182// used when the structure is not directly known.  However, unlike with
183// |innerText()|, the search depth and breadth are limited to a fixed threshold.
184// Whitespace is trimmed from text accumulated at descendant nodes.
185base::string16 FindChildText(const WebNode& node) {
186  if (node.isTextNode())
187    return node.nodeValue();
188
189  WebNode child = node.firstChild();
190
191  const int kChildSearchDepth = 10;
192  base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
193  TrimWhitespace(node_text, TRIM_ALL, &node_text);
194  return node_text;
195}
196
197// Helper for |InferLabelForElement()| that infers a label, if possible, from
198// a previous sibling of |element|,
199// e.g. Some Text <input ...>
200// or   Some <span>Text</span> <input ...>
201// or   <p>Some Text</p><input ...>
202// or   <label>Some Text</label> <input ...>
203// or   Some Text <img><input ...>
204// or   <b>Some Text</b><br/> <input ...>.
205base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
206  base::string16 inferred_label;
207  WebNode previous = element;
208  while (true) {
209    previous = previous.previousSibling();
210    if (previous.isNull())
211      break;
212
213    // Skip over comments.
214    WebNode::NodeType node_type = previous.nodeType();
215    if (node_type == WebNode::CommentNode)
216      continue;
217
218    // Otherwise, only consider normal HTML elements and their contents.
219    if (node_type != WebNode::TextNode &&
220        node_type != WebNode::ElementNode)
221      break;
222
223    // A label might be split across multiple "lightweight" nodes.
224    // Coalesce any text contained in multiple consecutive
225    //  (a) plain text nodes or
226    //  (b) inline HTML elements that are essentially equivalent to text nodes.
227    CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
228    CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
229    CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
230    CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
231    if (previous.isTextNode() ||
232        HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
233        HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
234      base::string16 value = FindChildText(previous);
235      // A text node's value will be empty if it is for a line break.
236      bool add_space = previous.isTextNode() && value.empty();
237      inferred_label =
238          CombineAndCollapseWhitespace(value, inferred_label, add_space);
239      continue;
240    }
241
242    // If we have identified a partial label and have reached a non-lightweight
243    // element, consider the label to be complete.
244    base::string16 trimmed_label;
245    TrimWhitespace(inferred_label, TRIM_ALL, &trimmed_label);
246    if (!trimmed_label.empty())
247      break;
248
249    // <img> and <br> tags often appear between the input element and its
250    // label text, so skip over them.
251    CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
252    CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
253    if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
254      continue;
255
256    // We only expect <p> and <label> tags to contain the full label text.
257    CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
258    CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
259    if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
260      inferred_label = FindChildText(previous);
261
262    break;
263  }
264
265  TrimWhitespace(inferred_label, TRIM_ALL, &inferred_label);
266  return inferred_label;
267}
268
269// Helper for |InferLabelForElement()| that infers a label, if possible, from
270// enclosing list item,
271// e.g. <li>Some Text<input ...><input ...><input ...></tr>
272base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
273  WebNode parent = element.parentNode();
274  CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
275  while (!parent.isNull() && parent.isElementNode() &&
276         !parent.to<WebElement>().hasTagName(kListItem)) {
277    parent = parent.parentNode();
278  }
279
280  if (!parent.isNull() && HasTagName(parent, kListItem))
281    return FindChildText(parent);
282
283  return base::string16();
284}
285
286// Helper for |InferLabelForElement()| that infers a label, if possible, from
287// surrounding table structure,
288// e.g. <tr><td>Some Text</td><td><input ...></td></tr>
289// or   <tr><th>Some Text</th><td><input ...></td></tr>
290// or   <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
291// or   <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
292base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
293  CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
294  WebNode parent = element.parentNode();
295  while (!parent.isNull() && parent.isElementNode() &&
296         !parent.to<WebElement>().hasTagName(kTableCell)) {
297    parent = parent.parentNode();
298  }
299
300  if (parent.isNull())
301    return base::string16();
302
303  // Check all previous siblings, skipping non-element nodes, until we find a
304  // non-empty text block.
305  base::string16 inferred_label;
306  WebNode previous = parent.previousSibling();
307  CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
308  while (inferred_label.empty() && !previous.isNull()) {
309    if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
310      inferred_label = FindChildText(previous);
311
312    previous = previous.previousSibling();
313  }
314
315  return inferred_label;
316}
317
318// Helper for |InferLabelForElement()| that infers a label, if possible, from
319// surrounding table structure,
320// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
321base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
322  CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
323  WebNode parent = element.parentNode();
324  while (!parent.isNull() && parent.isElementNode() &&
325         !parent.to<WebElement>().hasTagName(kTableRow)) {
326    parent = parent.parentNode();
327  }
328
329  if (parent.isNull())
330    return base::string16();
331
332  // Check all previous siblings, skipping non-element nodes, until we find a
333  // non-empty text block.
334  base::string16 inferred_label;
335  WebNode previous = parent.previousSibling();
336  while (inferred_label.empty() && !previous.isNull()) {
337    if (HasTagName(previous, kTableRow))
338      inferred_label = FindChildText(previous);
339
340    previous = previous.previousSibling();
341  }
342
343  return inferred_label;
344}
345
346// Helper for |InferLabelForElement()| that infers a label, if possible, from
347// a surrounding div table,
348// e.g. <div>Some Text<span><input ...></span></div>
349// e.g. <div>Some Text</div><div><input ...></div>
350base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
351  WebNode node = element.parentNode();
352  bool looking_for_parent = true;
353
354  // Search the sibling and parent <div>s until we find a candidate label.
355  base::string16 inferred_label;
356  CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
357  CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
358  CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
359  while (inferred_label.empty() && !node.isNull()) {
360    if (HasTagName(node, kDiv)) {
361      looking_for_parent = false;
362      inferred_label = FindChildText(node);
363    } else if (looking_for_parent &&
364               (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
365      // If the element is in a table or fieldset, its label most likely is too.
366      break;
367    }
368
369    if (node.previousSibling().isNull()) {
370      // If there are no more siblings, continue walking up the tree.
371      looking_for_parent = true;
372    }
373
374    if (looking_for_parent)
375      node = node.parentNode();
376    else
377      node = node.previousSibling();
378  }
379
380  return inferred_label;
381}
382
383// Helper for |InferLabelForElement()| that infers a label, if possible, from
384// a surrounding definition list,
385// e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
386// e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
387base::string16 InferLabelFromDefinitionList(
388    const WebFormControlElement& element) {
389  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
390  WebNode parent = element.parentNode();
391  while (!parent.isNull() && parent.isElementNode() &&
392         !parent.to<WebElement>().hasTagName(kDefinitionData))
393    parent = parent.parentNode();
394
395  if (parent.isNull() || !HasTagName(parent, kDefinitionData))
396    return base::string16();
397
398  // Skip by any intervening text nodes.
399  WebNode previous = parent.previousSibling();
400  while (!previous.isNull() && previous.isTextNode())
401    previous = previous.previousSibling();
402
403  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
404  if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
405    return base::string16();
406
407  return FindChildText(previous);
408}
409
410// Infers corresponding label for |element| from surrounding context in the DOM,
411// e.g. the contents of the preceding <p> tag or text element.
412base::string16 InferLabelForElement(const WebFormControlElement& element) {
413  base::string16 inferred_label = InferLabelFromPrevious(element);
414  if (!inferred_label.empty())
415    return inferred_label;
416
417  // If we didn't find a label, check for list item case.
418  inferred_label = InferLabelFromListItem(element);
419  if (!inferred_label.empty())
420    return inferred_label;
421
422  // If we didn't find a label, check for table cell case.
423  inferred_label = InferLabelFromTableColumn(element);
424  if (!inferred_label.empty())
425    return inferred_label;
426
427  // If we didn't find a label, check for table row case.
428  inferred_label = InferLabelFromTableRow(element);
429  if (!inferred_label.empty())
430    return inferred_label;
431
432  // If we didn't find a label, check for definition list case.
433  inferred_label = InferLabelFromDefinitionList(element);
434  if (!inferred_label.empty())
435    return inferred_label;
436
437  // If we didn't find a label, check for div table case.
438  return InferLabelFromDivTable(element);
439}
440
441// Fills |option_strings| with the values of the <option> elements present in
442// |select_element|.
443void GetOptionStringsFromElement(const WebSelectElement& select_element,
444                                 std::vector<base::string16>* option_values,
445                                 std::vector<base::string16>* option_contents) {
446  DCHECK(!select_element.isNull());
447
448  option_values->clear();
449  option_contents->clear();
450  WebVector<WebElement> list_items = select_element.listItems();
451  option_values->reserve(list_items.size());
452  option_contents->reserve(list_items.size());
453  for (size_t i = 0; i < list_items.size(); ++i) {
454    if (IsOptionElement(list_items[i])) {
455      const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
456      option_values->push_back(option.value());
457      option_contents->push_back(option.text());
458    }
459  }
460}
461
462// The callback type used by |ForEachMatchingFormField()|.
463typedef void (*Callback)(const FormFieldData&,
464                         bool, /* is_initiating_element */
465                         blink::WebFormControlElement*);
466
467// For each autofillable field in |data| that matches a field in the |form|,
468// the |callback| is invoked with the corresponding |form| field data.
469void ForEachMatchingFormField(const WebFormElement& form_element,
470                              const WebElement& initiating_element,
471                              const FormData& data,
472                              FieldFilterMask filters,
473                              bool force_override,
474                              Callback callback) {
475  std::vector<WebFormControlElement> control_elements;
476  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
477                              &control_elements);
478
479  if (control_elements.size() != data.fields.size()) {
480    // This case should be reachable only for pathological websites and tests,
481    // which add or remove form fields while the user is interacting with the
482    // Autofill popup.
483    return;
484  }
485
486  // It's possible that the site has injected fields into the form after the
487  // page has loaded, so we can't assert that the size of the cached control
488  // elements is equal to the size of the fields in |form|.  Fortunately, the
489  // one case in the wild where this happens, paypal.com signup form, the fields
490  // are appended to the end of the form and are not visible.
491  for (size_t i = 0; i < control_elements.size(); ++i) {
492    WebFormControlElement* element = &control_elements[i];
493
494    if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
495      // This case should be reachable only for pathological websites, which
496      // rename form fields while the user is interacting with the Autofill
497      // popup.  I (isherman) am not aware of any such websites, and so am
498      // optimistically including a NOTREACHED().  If you ever trip this check,
499      // please file a bug against me.
500      NOTREACHED();
501      continue;
502    }
503
504    bool is_initiating_element = (*element == initiating_element);
505
506    // Only autofill empty fields and the field that initiated the filling,
507    // i.e. the field the user is currently editing and interacting with.
508    const WebInputElement* input_element = toWebInputElement(element);
509    if (!force_override && !is_initiating_element &&
510        ((IsAutofillableInputElement(input_element) &&
511          !input_element->value().isEmpty()) ||
512         (IsTextAreaElement(*element) &&
513          !element->toConst<WebTextAreaElement>().value().isEmpty())))
514      continue;
515
516    if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
517        ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
518        ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
519      continue;
520
521    callback(data.fields[i], is_initiating_element, element);
522  }
523}
524
525// Sets the |field|'s value to the value in |data|.
526// Also sets the "autofilled" attribute, causing the background to be yellow.
527void FillFormField(const FormFieldData& data,
528                   bool is_initiating_node,
529                   blink::WebFormControlElement* field) {
530  // Nothing to fill.
531  if (data.value.empty())
532    return;
533
534  field->setAutofilled(true);
535
536  WebInputElement* input_element = toWebInputElement(field);
537  if (IsTextInput(input_element) || IsMonthInput(input_element)) {
538    // If the maxlength attribute contains a negative value, maxLength()
539    // returns the default maxlength value.
540    input_element->setValue(
541        data.value.substr(0, input_element->maxLength()), true);
542    if (is_initiating_node) {
543      int length = input_element->value().length();
544      input_element->setSelectionRange(length, length);
545      // Clear the current IME composition (the underline), if there is one.
546      input_element->document().frame()->unmarkText();
547    }
548  } else if (IsTextAreaElement(*field)) {
549    WebTextAreaElement text_area = field->to<WebTextAreaElement>();
550    if (text_area.value() != data.value) {
551      text_area.setValue(data.value);
552      text_area.dispatchFormControlChangeEvent();
553    }
554  } else if (IsSelectElement(*field)) {
555    WebSelectElement select_element = field->to<WebSelectElement>();
556    if (select_element.value() != data.value) {
557      select_element.setValue(data.value);
558      select_element.dispatchFormControlChangeEvent();
559    }
560  } else {
561    DCHECK(IsCheckableElement(input_element));
562    input_element->setChecked(data.is_checked, true);
563  }
564}
565
566// Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
567// Also sets the "autofilled" attribute, causing the background to be yellow.
568void PreviewFormField(const FormFieldData& data,
569                      bool is_initiating_node,
570                      blink::WebFormControlElement* field) {
571  // Nothing to preview.
572  if (data.value.empty())
573    return;
574
575  // Preview input and textarea fields. For input fields, excludes checkboxes
576  // and radio buttons, as there is no provision for setSuggestedCheckedValue
577  // in WebInputElement.
578  WebInputElement* input_element = toWebInputElement(field);
579  if (IsTextInput(input_element)) {
580    // If the maxlength attribute contains a negative value, maxLength()
581    // returns the default maxlength value.
582    input_element->setSuggestedValue(
583      data.value.substr(0, input_element->maxLength()));
584    input_element->setAutofilled(true);
585    if (is_initiating_node) {
586      // Select the part of the text that the user didn't type.
587      input_element->setSelectionRange(
588          input_element->value().length(),
589          input_element->suggestedValue().length());
590    }
591  } else if (IsTextAreaElement(*field)) {
592    WebTextAreaElement textarea = field->to<WebTextAreaElement>();
593    textarea.setSuggestedValue(data.value);
594    field->setAutofilled(true);
595  }
596}
597
598std::string RetrievalMethodToString(
599    const WebElementDescriptor::RetrievalMethod& method) {
600  switch (method) {
601    case WebElementDescriptor::CSS_SELECTOR:
602      return "CSS_SELECTOR";
603    case WebElementDescriptor::ID:
604      return "ID";
605    case WebElementDescriptor::NONE:
606      return "NONE";
607  }
608  NOTREACHED();
609  return "UNKNOWN";
610}
611
612// Recursively checks whether |node| or any of its children have a non-empty
613// bounding box. The recursion depth is bounded by |depth|.
614bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
615  if (depth < 0)
616    return false;
617  if (node.hasNonEmptyBoundingBox())
618    return true;
619
620  // The childNodes method is not a const method. Therefore it cannot be called
621  // on a const reference. Therefore we need a const cast.
622  const blink::WebNodeList& children =
623      const_cast<blink::WebNode&>(node).childNodes();
624  size_t length = children.length();
625  for (size_t i = 0; i < length; ++i) {
626    const blink::WebNode& item = children.item(i);
627    if (IsWebNodeVisibleImpl(item, depth - 1))
628      return true;
629  }
630  return false;
631}
632
633}  // namespace
634
635const size_t kMaxParseableFields = 200;
636
637bool IsMonthInput(const WebInputElement* element) {
638  CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
639  return element && element->formControlType() == kMonth;
640}
641
642// All text fields, including password fields, should be extracted.
643bool IsTextInput(const WebInputElement* element) {
644  return element && element->isTextField();
645}
646
647bool IsSelectElement(const WebFormControlElement& element) {
648  // Static for improved performance.
649  CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
650  return element.formControlType() == kSelectOne;
651}
652
653bool IsTextAreaElement(const WebFormControlElement& element) {
654  // Static for improved performance.
655  CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
656  return element.formControlType() == kTextArea;
657}
658
659bool IsCheckableElement(const WebInputElement* element) {
660  if (!element)
661    return false;
662
663  return element->isCheckbox() || element->isRadioButton();
664}
665
666bool IsAutofillableInputElement(const WebInputElement* element) {
667  return IsTextInput(element) ||
668         IsMonthInput(element) ||
669         IsCheckableElement(element);
670}
671
672const base::string16 GetFormIdentifier(const WebFormElement& form) {
673  base::string16 identifier = form.name();
674  CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
675  if (identifier.empty())
676    identifier = form.getAttribute(kId);
677
678  return identifier;
679}
680
681bool IsWebNodeVisible(const blink::WebNode& node) {
682  // In the bug http://crbug.com/237216 the form's bounding box is empty
683  // however the form has non empty children. Thus we need to look at the
684  // form's children.
685  int kNodeSearchDepth = 2;
686  return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
687}
688
689bool ClickElement(const WebDocument& document,
690                  const WebElementDescriptor& element_descriptor) {
691  WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
692  blink::WebElement element;
693
694  switch (element_descriptor.retrieval_method) {
695    case WebElementDescriptor::CSS_SELECTOR: {
696      WebExceptionCode ec = 0;
697      element = document.querySelector(web_descriptor, ec);
698      if (ec)
699        DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
700      break;
701    }
702    case WebElementDescriptor::ID:
703      element = document.getElementById(web_descriptor);
704      break;
705    case WebElementDescriptor::NONE:
706      return true;
707  }
708
709  if (element.isNull()) {
710    DVLOG(1) << "Could not find "
711             << element_descriptor.descriptor
712             << " by "
713             << RetrievalMethodToString(element_descriptor.retrieval_method)
714             << ".";
715    return false;
716  }
717
718  element.simulateClick();
719  return true;
720}
721
722// Fills |autofillable_elements| with all the auto-fillable form control
723// elements in |form_element|.
724void ExtractAutofillableElements(
725    const WebFormElement& form_element,
726    RequirementsMask requirements,
727    std::vector<WebFormControlElement>* autofillable_elements) {
728  WebVector<WebFormControlElement> control_elements;
729  form_element.getFormControlElements(control_elements);
730
731  autofillable_elements->clear();
732  for (size_t i = 0; i < control_elements.size(); ++i) {
733    WebFormControlElement element = control_elements[i];
734    if (!IsAutofillableElement(element))
735      continue;
736
737    if (requirements & REQUIRE_AUTOCOMPLETE) {
738      // TODO(isherman): WebKit currently doesn't handle the autocomplete
739      // attribute for select or textarea elements, but it probably should.
740      WebInputElement* input_element = toWebInputElement(&control_elements[i]);
741      if (IsAutofillableInputElement(input_element) &&
742          !SatisfiesRequireAutocomplete(*input_element))
743        continue;
744    }
745
746    autofillable_elements->push_back(element);
747  }
748}
749
750void WebFormControlElementToFormField(const WebFormControlElement& element,
751                                      ExtractMask extract_mask,
752                                      FormFieldData* field) {
753  DCHECK(field);
754  DCHECK(!element.isNull());
755  CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
756
757  // The label is not officially part of a WebFormControlElement; however, the
758  // labels for all form control elements are scraped from the DOM and set in
759  // WebFormElementToFormData.
760  field->name = element.nameForAutofill();
761  field->form_control_type = base::UTF16ToUTF8(element.formControlType());
762  field->autocomplete_attribute =
763      base::UTF16ToUTF8(element.getAttribute(kAutocomplete));
764  if (field->autocomplete_attribute.size() > kMaxDataLength) {
765    // Discard overly long attribute values to avoid DOS-ing the browser
766    // process.  However, send over a default string to indicate that the
767    // attribute was present.
768    field->autocomplete_attribute = "x-max-data-length-exceeded";
769  }
770
771  if (!IsAutofillableElement(element))
772    return;
773
774  const WebInputElement* input_element = toWebInputElement(&element);
775  if (IsAutofillableInputElement(input_element)) {
776    if (IsTextInput(input_element))
777      field->max_length = input_element->maxLength();
778
779    field->is_autofilled = input_element->isAutofilled();
780    field->is_focusable = input_element->isFocusable();
781    field->is_checkable = IsCheckableElement(input_element);
782    field->is_checked = input_element->isChecked();
783    field->should_autocomplete = input_element->autoComplete();
784    field->text_direction = input_element->directionForFormData() == "rtl" ?
785        base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
786  } else if (IsTextAreaElement(element)) {
787    // Nothing more to do in this case.
788  } else if (extract_mask & EXTRACT_OPTIONS) {
789    // Set option strings on the field if available.
790    DCHECK(IsSelectElement(element));
791    const WebSelectElement select_element = element.toConst<WebSelectElement>();
792    GetOptionStringsFromElement(select_element,
793                                &field->option_values,
794                                &field->option_contents);
795  }
796
797  if (!(extract_mask & EXTRACT_VALUE))
798    return;
799
800  base::string16 value;
801  if (IsAutofillableInputElement(input_element)) {
802    value = input_element->value();
803  } else if (IsTextAreaElement(element)) {
804    value = element.toConst<WebTextAreaElement>().value();
805  } else {
806    DCHECK(IsSelectElement(element));
807    const WebSelectElement select_element = element.toConst<WebSelectElement>();
808    value = select_element.value();
809
810    // Convert the |select_element| value to text if requested.
811    if (extract_mask & EXTRACT_OPTION_TEXT) {
812      WebVector<WebElement> list_items = select_element.listItems();
813      for (size_t i = 0; i < list_items.size(); ++i) {
814        if (IsOptionElement(list_items[i])) {
815          const WebOptionElement option_element =
816              list_items[i].toConst<WebOptionElement>();
817          if (option_element.value() == value) {
818            value = option_element.text();
819            break;
820          }
821        }
822      }
823    }
824  }
825
826  // Constrain the maximum data length to prevent a malicious site from DOS'ing
827  // the browser: http://crbug.com/49332
828  if (value.size() > kMaxDataLength)
829    value = value.substr(0, kMaxDataLength);
830
831  field->value = value;
832}
833
834bool WebFormElementToFormData(
835    const blink::WebFormElement& form_element,
836    const blink::WebFormControlElement& form_control_element,
837    RequirementsMask requirements,
838    ExtractMask extract_mask,
839    FormData* form,
840    FormFieldData* field) {
841  CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
842  CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
843  CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
844
845  const WebFrame* frame = form_element.document().frame();
846  if (!frame)
847    return false;
848
849  if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
850    return false;
851
852  form->name = GetFormIdentifier(form_element);
853  form->method = form_element.method();
854  form->origin = frame->document().url();
855  form->action = frame->document().completeURL(form_element.action());
856  form->user_submitted = form_element.wasUserSubmitted();
857
858  // If the completed URL is not valid, just use the action we get from
859  // WebKit.
860  if (!form->action.is_valid())
861    form->action = GURL(form_element.action());
862
863  // A map from a FormFieldData's name to the FormFieldData itself.
864  std::map<base::string16, FormFieldData*> name_map;
865
866  // The extracted FormFields.  We use pointers so we can store them in
867  // |name_map|.
868  ScopedVector<FormFieldData> form_fields;
869
870  WebVector<WebFormControlElement> control_elements;
871  form_element.getFormControlElements(control_elements);
872
873  // A vector of bools that indicate whether each field in the form meets the
874  // requirements and thus will be in the resulting |form|.
875  std::vector<bool> fields_extracted(control_elements.size(), false);
876
877  for (size_t i = 0; i < control_elements.size(); ++i) {
878    const WebFormControlElement& control_element = control_elements[i];
879
880    if (!IsAutofillableElement(control_element))
881      continue;
882
883    const WebInputElement* input_element = toWebInputElement(&control_element);
884    if (requirements & REQUIRE_AUTOCOMPLETE &&
885        IsAutofillableInputElement(input_element) &&
886        !SatisfiesRequireAutocomplete(*input_element))
887      continue;
888
889    // Create a new FormFieldData, fill it out and map it to the field's name.
890    FormFieldData* form_field = new FormFieldData;
891    WebFormControlElementToFormField(control_element, extract_mask, form_field);
892    form_fields.push_back(form_field);
893    // TODO(jhawkins): A label element is mapped to a form control element's id.
894    // field->name() will contain the id only if the name does not exist.  Add
895    // an id() method to WebFormControlElement and use that here.
896    name_map[form_field->name] = form_field;
897    fields_extracted[i] = true;
898  }
899
900  // If we failed to extract any fields, give up.  Also, to avoid overly
901  // expensive computation, we impose a maximum number of allowable fields.
902  if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
903    return false;
904
905  // Loop through the label elements inside the form element.  For each label
906  // element, get the corresponding form control element, use the form control
907  // element's name as a key into the <name, FormFieldData> map to find the
908  // previously created FormFieldData and set the FormFieldData's label to the
909  // label.firstChild().nodeValue() of the label element.
910  WebElementCollection labels = form_element.getElementsByTagName(kLabel);
911  DCHECK(!labels.isNull());
912  for (WebElement item = labels.firstItem(); !item.isNull();
913       item = labels.nextItem()) {
914    WebLabelElement label = item.to<WebLabelElement>();
915    WebFormControlElement field_element =
916        label.correspondingControl().to<WebFormControlElement>();
917
918    base::string16 element_name;
919    if (field_element.isNull()) {
920      // Sometimes site authors will incorrectly specify the corresponding
921      // field element's name rather than its id, so we compensate here.
922      element_name = label.getAttribute(kFor);
923    } else if (
924        !field_element.isFormControlElement() ||
925        field_element.formControlType() == kHidden) {
926      continue;
927    } else {
928      element_name = field_element.nameForAutofill();
929    }
930
931    std::map<base::string16, FormFieldData*>::iterator iter =
932        name_map.find(element_name);
933    if (iter != name_map.end()) {
934      base::string16 label_text = FindChildText(label);
935
936      // Concatenate labels because some sites might have multiple label
937      // candidates.
938      if (!iter->second->label.empty() && !label_text.empty())
939        iter->second->label += base::ASCIIToUTF16(" ");
940      iter->second->label += label_text;
941    }
942  }
943
944  // Loop through the form control elements, extracting the label text from
945  // the DOM.  We use the |fields_extracted| vector to make sure we assign the
946  // extracted label to the correct field, as it's possible |form_fields| will
947  // not contain all of the elements in |control_elements|.
948  for (size_t i = 0, field_idx = 0;
949       i < control_elements.size() && field_idx < form_fields.size(); ++i) {
950    // This field didn't meet the requirements, so don't try to find a label
951    // for it.
952    if (!fields_extracted[i])
953      continue;
954
955    const WebFormControlElement& control_element = control_elements[i];
956    if (form_fields[field_idx]->label.empty())
957      form_fields[field_idx]->label = InferLabelForElement(control_element);
958
959    if (field && form_control_element == control_element)
960      *field = *form_fields[field_idx];
961
962    ++field_idx;
963  }
964
965  // Copy the created FormFields into the resulting FormData object.
966  for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
967       iter != form_fields.end(); ++iter) {
968    form->fields.push_back(**iter);
969  }
970
971  return true;
972}
973
974bool FindFormAndFieldForInputElement(const WebInputElement& element,
975                                     FormData* form,
976                                     FormFieldData* field,
977                                     RequirementsMask requirements) {
978  if (!IsAutofillableElement(element))
979    return false;
980
981  const WebFormElement form_element = element.form();
982  if (form_element.isNull())
983    return false;
984
985  ExtractMask extract_mask =
986      static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
987  return WebFormElementToFormData(form_element,
988                                  element,
989                                  requirements,
990                                  extract_mask,
991                                  form,
992                                  field);
993}
994
995void FillForm(const FormData& form, const WebInputElement& element) {
996  WebFormElement form_element = element.form();
997  if (form_element.isNull())
998    return;
999
1000  ForEachMatchingFormField(form_element,
1001                           element,
1002                           form,
1003                           FILTER_ALL_NON_EDITIABLE_ELEMENTS,
1004                           false, /* dont force override */
1005                           &FillFormField);
1006}
1007
1008void FillFormIncludingNonFocusableElements(const FormData& form_data,
1009                                           const WebFormElement& form_element) {
1010  if (form_element.isNull())
1011    return;
1012
1013  FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
1014      FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
1015  ForEachMatchingFormField(form_element,
1016                           WebInputElement(),
1017                           form_data,
1018                           filter_mask,
1019                           true, /* force override */
1020                           &FillFormField);
1021}
1022
1023void FillFormForAllElements(const FormData& form_data,
1024                            const WebFormElement& form_element) {
1025  if (form_element.isNull())
1026    return;
1027
1028  ForEachMatchingFormField(form_element,
1029                           WebInputElement(),
1030                           form_data,
1031                           FILTER_NONE,
1032                           true, /* force override */
1033                           &FillFormField);
1034}
1035
1036void PreviewForm(const FormData& form, const WebInputElement& element) {
1037  WebFormElement form_element = element.form();
1038  if (form_element.isNull())
1039    return;
1040
1041  ForEachMatchingFormField(form_element,
1042                           element,
1043                           form,
1044                           FILTER_ALL_NON_EDITIABLE_ELEMENTS,
1045                           false, /* dont force override */
1046                           &PreviewFormField);
1047}
1048
1049bool ClearPreviewedFormWithElement(const WebInputElement& element,
1050                                   bool was_autofilled) {
1051  WebFormElement form_element = element.form();
1052  if (form_element.isNull())
1053    return false;
1054
1055  std::vector<WebFormControlElement> control_elements;
1056  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1057                              &control_elements);
1058  for (size_t i = 0; i < control_elements.size(); ++i) {
1059    // There might be unrelated elements in this form which have already been
1060    // auto-filled.  For example, the user might have already filled the address
1061    // part of a form and now be dealing with the credit card section.  We only
1062    // want to reset the auto-filled status for fields that were previewed.
1063    WebFormControlElement control_element = control_elements[i];
1064
1065    // Only text input and textarea elements can be previewed.
1066    WebInputElement* input_element = toWebInputElement(&control_element);
1067    if (!IsTextInput(input_element) && !IsTextAreaElement(control_element))
1068      continue;
1069
1070    // If the element is not auto-filled, we did not preview it,
1071    // so there is nothing to reset.
1072    if(!control_element.isAutofilled())
1073      continue;
1074
1075    if ((IsTextInput(input_element) &&
1076         input_element->suggestedValue().isEmpty()) ||
1077        (IsTextAreaElement(control_element) &&
1078         control_element.to<WebTextAreaElement>().suggestedValue().isEmpty()))
1079      continue;
1080
1081    // Clear the suggested value. For the initiating node, also restore the
1082    // original value.
1083    if (IsTextInput(input_element)) {
1084      input_element->setSuggestedValue(WebString());
1085      bool is_initiating_node = (element == *input_element);
1086      if (is_initiating_node)
1087        input_element->setAutofilled(was_autofilled);
1088      else
1089        input_element->setAutofilled(false);
1090
1091      // Clearing the suggested value in the focused node (above) can cause
1092      // selection to be lost. We force selection range to restore the text
1093      // cursor.
1094      if (is_initiating_node) {
1095        int length = input_element->value().length();
1096        input_element->setSelectionRange(length, length);
1097      }
1098    } else if (IsTextAreaElement(control_element)) {
1099      WebTextAreaElement text_area = control_element.to<WebTextAreaElement>();
1100      text_area.setSuggestedValue(WebString());
1101      bool is_initiating_node = (element == text_area);
1102      if (is_initiating_node)
1103        control_element.setAutofilled(was_autofilled);
1104      else
1105        control_element.setAutofilled(false);
1106    }
1107  }
1108
1109  return true;
1110}
1111
1112bool FormWithElementIsAutofilled(const WebInputElement& element) {
1113  WebFormElement form_element = element.form();
1114  if (form_element.isNull())
1115    return false;
1116
1117  std::vector<WebFormControlElement> control_elements;
1118  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1119                              &control_elements);
1120  for (size_t i = 0; i < control_elements.size(); ++i) {
1121    WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1122    if (!IsAutofillableInputElement(input_element))
1123      continue;
1124
1125    if (input_element->isAutofilled())
1126      return true;
1127  }
1128
1129  return false;
1130}
1131
1132bool IsWebpageEmpty(const blink::WebFrame* frame) {
1133  blink::WebDocument document = frame->document();
1134
1135  return IsWebElementEmpty(document.head()) &&
1136         IsWebElementEmpty(document.body());
1137}
1138
1139bool IsWebElementEmpty(const blink::WebElement& element) {
1140  // This array contains all tags which can be present in an empty page.
1141  const char* const kAllowedValue[] = {
1142    "script",
1143    "meta",
1144    "title",
1145  };
1146  const size_t kAllowedValueLength = arraysize(kAllowedValue);
1147
1148  if (element.isNull())
1149    return true;
1150  // The childNodes method is not a const method. Therefore it cannot be called
1151  // on a const reference. Therefore we need a const cast.
1152  const blink::WebNodeList& children =
1153      const_cast<blink::WebElement&>(element).childNodes();
1154  for (size_t i = 0; i < children.length(); ++i) {
1155    const blink::WebNode& item = children.item(i);
1156
1157    if (item.isTextNode() &&
1158        !ContainsOnlyWhitespaceASCII(item.nodeValue().utf8()))
1159      return false;
1160
1161    // We ignore all other items with names which begin with
1162    // the character # because they are not html tags.
1163    if (item.nodeName().utf8()[0] == '#')
1164      continue;
1165
1166    bool tag_is_allowed = false;
1167    // Test if the item name is in the kAllowedValue array
1168    for (size_t allowed_value_index = 0;
1169         allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1170      if (HasTagName(item,
1171                     WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1172        tag_is_allowed = true;
1173        break;
1174      }
1175    }
1176    if (!tag_is_allowed)
1177      return false;
1178  }
1179  return true;
1180}
1181
1182gfx::RectF GetScaledBoundingBox(float scale, WebInputElement* element) {
1183  gfx::Rect bounding_box(element->boundsInViewportSpace());
1184  return gfx::RectF(bounding_box.x() * scale,
1185                    bounding_box.y() * scale,
1186                    bounding_box.width() * scale,
1187                    bounding_box.height() * scale);
1188}
1189
1190}  // namespace autofill
1191