1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/autofill/content/renderer/form_autofill_util.h"
6
7#include <map>
8
9#include "base/command_line.h"
10#include "base/logging.h"
11#include "base/memory/scoped_vector.h"
12#include "base/metrics/field_trial.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "components/autofill/core/common/autofill_data_validation.h"
16#include "components/autofill/core/common/autofill_switches.h"
17#include "components/autofill/core/common/form_data.h"
18#include "components/autofill/core/common/form_field_data.h"
19#include "components/autofill/core/common/web_element_descriptor.h"
20#include "third_party/WebKit/public/platform/WebString.h"
21#include "third_party/WebKit/public/platform/WebVector.h"
22#include "third_party/WebKit/public/web/WebDocument.h"
23#include "third_party/WebKit/public/web/WebElement.h"
24#include "third_party/WebKit/public/web/WebElementCollection.h"
25#include "third_party/WebKit/public/web/WebExceptionCode.h"
26#include "third_party/WebKit/public/web/WebFormControlElement.h"
27#include "third_party/WebKit/public/web/WebFormElement.h"
28#include "third_party/WebKit/public/web/WebInputElement.h"
29#include "third_party/WebKit/public/web/WebLabelElement.h"
30#include "third_party/WebKit/public/web/WebLocalFrame.h"
31#include "third_party/WebKit/public/web/WebNode.h"
32#include "third_party/WebKit/public/web/WebNodeList.h"
33#include "third_party/WebKit/public/web/WebOptionElement.h"
34#include "third_party/WebKit/public/web/WebSelectElement.h"
35#include "third_party/WebKit/public/web/WebTextAreaElement.h"
36
37using blink::WebDocument;
38using blink::WebElement;
39using blink::WebElementCollection;
40using blink::WebExceptionCode;
41using blink::WebFormControlElement;
42using blink::WebFormElement;
43using blink::WebFrame;
44using blink::WebInputElement;
45using blink::WebLabelElement;
46using blink::WebNode;
47using blink::WebNodeList;
48using blink::WebOptionElement;
49using blink::WebSelectElement;
50using blink::WebTextAreaElement;
51using blink::WebString;
52using blink::WebVector;
53
54namespace autofill {
55namespace {
56
57bool IsOptionElement(const WebElement& element) {
58  CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
59  return element.hasHTMLTagName(kOption);
60}
61
62bool IsScriptElement(const WebElement& element) {
63  CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
64  return element.hasHTMLTagName(kScript);
65}
66
67bool IsNoScriptElement(const WebElement& element) {
68  CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
69  return element.hasHTMLTagName(kNoScript);
70}
71
72bool HasTagName(const WebNode& node, const blink::WebString& tag) {
73  return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
74}
75
76bool IsAutofillableElement(const WebFormControlElement& element) {
77  const WebInputElement* input_element = toWebInputElement(&element);
78  return IsAutofillableInputElement(input_element) ||
79         IsSelectElement(element) ||
80         IsTextAreaElement(element);
81}
82
83// Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
84bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
85  return input_element.autoComplete();
86}
87
88// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
89// to a single space.  If |force_whitespace| is true, then the resulting string
90// is guaranteed to have a space between |prefix| and |suffix|.  Otherwise, the
91// result includes a space only if |prefix| has trailing whitespace or |suffix|
92// has leading whitespace.
93// A few examples:
94//  * CombineAndCollapseWhitespace("foo", "bar", false)       -> "foobar"
95//  * CombineAndCollapseWhitespace("foo", "bar", true)        -> "foo bar"
96//  * CombineAndCollapseWhitespace("foo ", "bar", false)      -> "foo bar"
97//  * CombineAndCollapseWhitespace("foo", " bar", false)      -> "foo bar"
98//  * CombineAndCollapseWhitespace("foo", " bar", true)       -> "foo bar"
99//  * CombineAndCollapseWhitespace("foo   ", "   bar", false) -> "foo bar"
100//  * CombineAndCollapseWhitespace(" foo", "bar ", false)     -> " foobar "
101//  * CombineAndCollapseWhitespace(" foo", "bar ", true)      -> " foo bar "
102const base::string16 CombineAndCollapseWhitespace(
103    const base::string16& prefix,
104    const base::string16& suffix,
105    bool force_whitespace) {
106  base::string16 prefix_trimmed;
107  base::TrimPositions prefix_trailing_whitespace =
108      base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
109
110  // Recursively compute the children's text.
111  base::string16 suffix_trimmed;
112  base::TrimPositions suffix_leading_whitespace =
113      base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
114
115  if (prefix_trailing_whitespace || suffix_leading_whitespace ||
116      force_whitespace) {
117    return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
118  } else {
119    return prefix_trimmed + suffix_trimmed;
120  }
121}
122
123// This is a helper function for the FindChildText() function (see below).
124// Search depth is limited with the |depth| parameter.
125base::string16 FindChildTextInner(const WebNode& node, int depth) {
126  if (depth <= 0 || node.isNull())
127    return base::string16();
128
129  // Skip over comments.
130  if (node.nodeType() == WebNode::CommentNode)
131    return FindChildTextInner(node.nextSibling(), depth - 1);
132
133  if (node.nodeType() != WebNode::ElementNode &&
134      node.nodeType() != WebNode::TextNode)
135    return base::string16();
136
137  // Ignore elements known not to contain inferable labels.
138  if (node.isElementNode()) {
139    const WebElement element = node.toConst<WebElement>();
140    if (IsOptionElement(element) ||
141        IsScriptElement(element) ||
142        IsNoScriptElement(element) ||
143        (element.isFormControlElement() &&
144         IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
145      return base::string16();
146    }
147  }
148
149  // Extract the text exactly at this node.
150  base::string16 node_text = node.nodeValue();
151
152  // Recursively compute the children's text.
153  // Preserve inter-element whitespace separation.
154  base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
155  bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
156  node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
157
158  // Recursively compute the siblings' text.
159  // Again, preserve inter-element whitespace separation.
160  base::string16 sibling_text =
161      FindChildTextInner(node.nextSibling(), depth - 1);
162  add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
163  node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
164
165  return node_text;
166}
167
168// Returns the aggregated values of the descendants of |element| that are
169// non-empty text nodes.  This is a faster alternative to |innerText()| for
170// performance critical operations.  It does a full depth-first search so can be
171// used when the structure is not directly known.  However, unlike with
172// |innerText()|, the search depth and breadth are limited to a fixed threshold.
173// Whitespace is trimmed from text accumulated at descendant nodes.
174base::string16 FindChildText(const WebNode& node) {
175  if (node.isTextNode())
176    return node.nodeValue();
177
178  WebNode child = node.firstChild();
179
180  const int kChildSearchDepth = 10;
181  base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
182  base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
183  return node_text;
184}
185
186// Helper for |InferLabelForElement()| that infers a label, if possible, from
187// a previous sibling of |element|,
188// e.g. Some Text <input ...>
189// or   Some <span>Text</span> <input ...>
190// or   <p>Some Text</p><input ...>
191// or   <label>Some Text</label> <input ...>
192// or   Some Text <img><input ...>
193// or   <b>Some Text</b><br/> <input ...>.
194base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
195  base::string16 inferred_label;
196  WebNode previous = element;
197  while (true) {
198    previous = previous.previousSibling();
199    if (previous.isNull())
200      break;
201
202    // Skip over comments.
203    WebNode::NodeType node_type = previous.nodeType();
204    if (node_type == WebNode::CommentNode)
205      continue;
206
207    // Otherwise, only consider normal HTML elements and their contents.
208    if (node_type != WebNode::TextNode &&
209        node_type != WebNode::ElementNode)
210      break;
211
212    // A label might be split across multiple "lightweight" nodes.
213    // Coalesce any text contained in multiple consecutive
214    //  (a) plain text nodes or
215    //  (b) inline HTML elements that are essentially equivalent to text nodes.
216    CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
217    CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
218    CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
219    CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
220    if (previous.isTextNode() ||
221        HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
222        HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
223      base::string16 value = FindChildText(previous);
224      // A text node's value will be empty if it is for a line break.
225      bool add_space = previous.isTextNode() && value.empty();
226      inferred_label =
227          CombineAndCollapseWhitespace(value, inferred_label, add_space);
228      continue;
229    }
230
231    // If we have identified a partial label and have reached a non-lightweight
232    // element, consider the label to be complete.
233    base::string16 trimmed_label;
234    base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
235    if (!trimmed_label.empty())
236      break;
237
238    // <img> and <br> tags often appear between the input element and its
239    // label text, so skip over them.
240    CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
241    CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
242    if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
243      continue;
244
245    // We only expect <p> and <label> tags to contain the full label text.
246    CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
247    CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
248    if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
249      inferred_label = FindChildText(previous);
250
251    break;
252  }
253
254  base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
255  return inferred_label;
256}
257
258// Helper for |InferLabelForElement()| that infers a label, if possible, from
259// enclosing list item,
260// e.g. <li>Some Text<input ...><input ...><input ...></tr>
261base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
262  WebNode parent = element.parentNode();
263  CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
264  while (!parent.isNull() && parent.isElementNode() &&
265         !parent.to<WebElement>().hasHTMLTagName(kListItem)) {
266    parent = parent.parentNode();
267  }
268
269  if (!parent.isNull() && HasTagName(parent, kListItem))
270    return FindChildText(parent);
271
272  return base::string16();
273}
274
275// Helper for |InferLabelForElement()| that infers a label, if possible, from
276// surrounding table structure,
277// e.g. <tr><td>Some Text</td><td><input ...></td></tr>
278// or   <tr><th>Some Text</th><td><input ...></td></tr>
279// or   <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
280// or   <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
281base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
282  CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
283  WebNode parent = element.parentNode();
284  while (!parent.isNull() && parent.isElementNode() &&
285         !parent.to<WebElement>().hasHTMLTagName(kTableCell)) {
286    parent = parent.parentNode();
287  }
288
289  if (parent.isNull())
290    return base::string16();
291
292  // Check all previous siblings, skipping non-element nodes, until we find a
293  // non-empty text block.
294  base::string16 inferred_label;
295  WebNode previous = parent.previousSibling();
296  CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
297  while (inferred_label.empty() && !previous.isNull()) {
298    if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
299      inferred_label = FindChildText(previous);
300
301    previous = previous.previousSibling();
302  }
303
304  return inferred_label;
305}
306
307// Helper for |InferLabelForElement()| that infers a label, if possible, from
308// surrounding table structure,
309// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
310base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
311  CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
312  WebNode parent = element.parentNode();
313  while (!parent.isNull() && parent.isElementNode() &&
314         !parent.to<WebElement>().hasHTMLTagName(kTableRow)) {
315    parent = parent.parentNode();
316  }
317
318  if (parent.isNull())
319    return base::string16();
320
321  // Check all previous siblings, skipping non-element nodes, until we find a
322  // non-empty text block.
323  base::string16 inferred_label;
324  WebNode previous = parent.previousSibling();
325  while (inferred_label.empty() && !previous.isNull()) {
326    if (HasTagName(previous, kTableRow))
327      inferred_label = FindChildText(previous);
328
329    previous = previous.previousSibling();
330  }
331
332  return inferred_label;
333}
334
335// Helper for |InferLabelForElement()| that infers a label, if possible, from
336// a surrounding div table,
337// e.g. <div>Some Text<span><input ...></span></div>
338// e.g. <div>Some Text</div><div><input ...></div>
339base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
340  WebNode node = element.parentNode();
341  bool looking_for_parent = true;
342
343  // Search the sibling and parent <div>s until we find a candidate label.
344  base::string16 inferred_label;
345  CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
346  CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
347  CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
348  while (inferred_label.empty() && !node.isNull()) {
349    if (HasTagName(node, kDiv)) {
350      looking_for_parent = false;
351      inferred_label = FindChildText(node);
352    } else if (looking_for_parent &&
353               (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
354      // If the element is in a table or fieldset, its label most likely is too.
355      break;
356    }
357
358    if (node.previousSibling().isNull()) {
359      // If there are no more siblings, continue walking up the tree.
360      looking_for_parent = true;
361    }
362
363    if (looking_for_parent)
364      node = node.parentNode();
365    else
366      node = node.previousSibling();
367  }
368
369  return inferred_label;
370}
371
372// Helper for |InferLabelForElement()| that infers a label, if possible, from
373// a surrounding definition list,
374// e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
375// e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
376base::string16 InferLabelFromDefinitionList(
377    const WebFormControlElement& element) {
378  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
379  WebNode parent = element.parentNode();
380  while (!parent.isNull() && parent.isElementNode() &&
381         !parent.to<WebElement>().hasHTMLTagName(kDefinitionData))
382    parent = parent.parentNode();
383
384  if (parent.isNull() || !HasTagName(parent, kDefinitionData))
385    return base::string16();
386
387  // Skip by any intervening text nodes.
388  WebNode previous = parent.previousSibling();
389  while (!previous.isNull() && previous.isTextNode())
390    previous = previous.previousSibling();
391
392  CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
393  if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
394    return base::string16();
395
396  return FindChildText(previous);
397}
398
399// Infers corresponding label for |element| from surrounding context in the DOM,
400// e.g. the contents of the preceding <p> tag or text element.
401base::string16 InferLabelForElement(const WebFormControlElement& element) {
402  base::string16 inferred_label = InferLabelFromPrevious(element);
403  if (!inferred_label.empty())
404    return inferred_label;
405
406  // If we didn't find a label, check for list item case.
407  inferred_label = InferLabelFromListItem(element);
408  if (!inferred_label.empty())
409    return inferred_label;
410
411  // If we didn't find a label, check for table cell case.
412  inferred_label = InferLabelFromTableColumn(element);
413  if (!inferred_label.empty())
414    return inferred_label;
415
416  // If we didn't find a label, check for table row case.
417  inferred_label = InferLabelFromTableRow(element);
418  if (!inferred_label.empty())
419    return inferred_label;
420
421  // If we didn't find a label, check for definition list case.
422  inferred_label = InferLabelFromDefinitionList(element);
423  if (!inferred_label.empty())
424    return inferred_label;
425
426  // If we didn't find a label, check for div table case.
427  return InferLabelFromDivTable(element);
428}
429
430// Fills |option_strings| with the values of the <option> elements present in
431// |select_element|.
432void GetOptionStringsFromElement(const WebSelectElement& select_element,
433                                 std::vector<base::string16>* option_values,
434                                 std::vector<base::string16>* option_contents) {
435  DCHECK(!select_element.isNull());
436
437  option_values->clear();
438  option_contents->clear();
439  WebVector<WebElement> list_items = select_element.listItems();
440
441  // Constrain the maximum list length to prevent a malicious site from DOS'ing
442  // the browser, without entirely breaking autocomplete for some extreme
443  // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
444  if (list_items.size() > kMaxListSize)
445    return;
446
447  option_values->reserve(list_items.size());
448  option_contents->reserve(list_items.size());
449  for (size_t i = 0; i < list_items.size(); ++i) {
450    if (IsOptionElement(list_items[i])) {
451      const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
452      option_values->push_back(option.value());
453      option_contents->push_back(option.text());
454    }
455  }
456}
457
458// The callback type used by |ForEachMatchingFormField()|.
459typedef void (*Callback)(const FormFieldData&,
460                         bool, /* is_initiating_element */
461                         blink::WebFormControlElement*);
462
463// For each autofillable field in |data| that matches a field in the |form|,
464// the |callback| is invoked with the corresponding |form| field data.
465void ForEachMatchingFormField(const WebFormElement& form_element,
466                              const WebElement& initiating_element,
467                              const FormData& data,
468                              bool only_focusable_elements,
469                              bool force_override,
470                              Callback callback) {
471  std::vector<WebFormControlElement> control_elements;
472  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
473                              &control_elements);
474
475  if (control_elements.size() != data.fields.size()) {
476    // This case should be reachable only for pathological websites and tests,
477    // which add or remove form fields while the user is interacting with the
478    // Autofill popup.
479    return;
480  }
481
482  // It's possible that the site has injected fields into the form after the
483  // page has loaded, so we can't assert that the size of the cached control
484  // elements is equal to the size of the fields in |form|.  Fortunately, the
485  // one case in the wild where this happens, paypal.com signup form, the fields
486  // are appended to the end of the form and are not visible.
487  for (size_t i = 0; i < control_elements.size(); ++i) {
488    WebFormControlElement* element = &control_elements[i];
489
490    if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
491      // This case should be reachable only for pathological websites, which
492      // rename form fields while the user is interacting with the Autofill
493      // popup.  I (isherman) am not aware of any such websites, and so am
494      // optimistically including a NOTREACHED().  If you ever trip this check,
495      // please file a bug against me.
496      NOTREACHED();
497      continue;
498    }
499
500    bool is_initiating_element = (*element == initiating_element);
501
502    // Only autofill empty fields and the field that initiated the filling,
503    // i.e. the field the user is currently editing and interacting with.
504    const WebInputElement* input_element = toWebInputElement(element);
505    if (!force_override && !is_initiating_element &&
506        ((IsAutofillableInputElement(input_element) ||
507          IsTextAreaElement(*element)) &&
508         !element->value().isEmpty()))
509      continue;
510
511    if (!element->isEnabled() || element->isReadOnly() ||
512        (only_focusable_elements && !element->isFocusable()))
513      continue;
514
515    callback(data.fields[i], is_initiating_element, element);
516  }
517}
518
519// Sets the |field|'s value to the value in |data|.
520// Also sets the "autofilled" attribute, causing the background to be yellow.
521void FillFormField(const FormFieldData& data,
522                   bool is_initiating_node,
523                   blink::WebFormControlElement* field) {
524  // Nothing to fill.
525  if (data.value.empty())
526    return;
527
528  if (!data.is_autofilled)
529    return;
530
531  WebInputElement* input_element = toWebInputElement(field);
532  if (IsCheckableElement(input_element)) {
533    input_element->setChecked(data.is_checked, true);
534  } else {
535    base::string16 value = data.value;
536    if (IsTextInput(input_element) || IsMonthInput(input_element)) {
537      // If the maxlength attribute contains a negative value, maxLength()
538      // returns the default maxlength value.
539      value = value.substr(0, input_element->maxLength());
540    }
541    field->setValue(value, true);
542  }
543
544  field->setAutofilled(true);
545
546  if (is_initiating_node &&
547      ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
548       IsTextAreaElement(*field))) {
549    int length = field->value().length();
550    field->setSelectionRange(length, length);
551    // Clear the current IME composition (the underline), if there is one.
552    field->document().frame()->unmarkText();
553  }
554}
555
556// Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
557// Also sets the "autofilled" attribute, causing the background to be yellow.
558void PreviewFormField(const FormFieldData& data,
559                      bool is_initiating_node,
560                      blink::WebFormControlElement* field) {
561  // Nothing to preview.
562  if (data.value.empty())
563    return;
564
565  if (!data.is_autofilled)
566    return;
567
568  // Preview input, textarea and select fields. For input fields, excludes
569  // checkboxes and radio buttons, as there is no provision for
570  // setSuggestedCheckedValue in WebInputElement.
571  WebInputElement* input_element = toWebInputElement(field);
572  if (IsTextInput(input_element) || IsMonthInput(input_element)) {
573    // If the maxlength attribute contains a negative value, maxLength()
574    // returns the default maxlength value.
575    input_element->setSuggestedValue(
576      data.value.substr(0, input_element->maxLength()));
577    input_element->setAutofilled(true);
578  } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
579    field->setSuggestedValue(data.value);
580    field->setAutofilled(true);
581  }
582
583  if (is_initiating_node &&
584      (IsTextInput(input_element) || IsTextAreaElement(*field))) {
585    // Select the part of the text that the user didn't type.
586    int start = field->value().length();
587    int end = field->suggestedValue().length();
588    field->setSelectionRange(start, end);
589  }
590}
591
592std::string RetrievalMethodToString(
593    const WebElementDescriptor::RetrievalMethod& method) {
594  switch (method) {
595    case WebElementDescriptor::CSS_SELECTOR:
596      return "CSS_SELECTOR";
597    case WebElementDescriptor::ID:
598      return "ID";
599    case WebElementDescriptor::NONE:
600      return "NONE";
601  }
602  NOTREACHED();
603  return "UNKNOWN";
604}
605
606// Recursively checks whether |node| or any of its children have a non-empty
607// bounding box. The recursion depth is bounded by |depth|.
608bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
609  if (depth < 0)
610    return false;
611  if (node.hasNonEmptyBoundingBox())
612    return true;
613
614  // The childNodes method is not a const method. Therefore it cannot be called
615  // on a const reference. Therefore we need a const cast.
616  const blink::WebNodeList& children =
617      const_cast<blink::WebNode&>(node).childNodes();
618  size_t length = children.length();
619  for (size_t i = 0; i < length; ++i) {
620    const blink::WebNode& item = children.item(i);
621    if (IsWebNodeVisibleImpl(item, depth - 1))
622      return true;
623  }
624  return false;
625}
626
627}  // namespace
628
629const size_t kMaxParseableFields = 200;
630
631bool IsMonthInput(const WebInputElement* element) {
632  CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
633  return element && !element->isNull() && element->formControlType() == kMonth;
634}
635
636// All text fields, including password fields, should be extracted.
637bool IsTextInput(const WebInputElement* element) {
638  return element && !element->isNull() && element->isTextField();
639}
640
641bool IsSelectElement(const WebFormControlElement& element) {
642  // Static for improved performance.
643  CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
644  return !element.isNull() && element.formControlType() == kSelectOne;
645}
646
647bool IsTextAreaElement(const WebFormControlElement& element) {
648  // Static for improved performance.
649  CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
650  return !element.isNull() && element.formControlType() == kTextArea;
651}
652
653bool IsCheckableElement(const WebInputElement* element) {
654  if (!element || element->isNull())
655    return false;
656
657  return element->isCheckbox() || element->isRadioButton();
658}
659
660bool IsAutofillableInputElement(const WebInputElement* element) {
661  return IsTextInput(element) ||
662         IsMonthInput(element) ||
663         IsCheckableElement(element);
664}
665
666const base::string16 GetFormIdentifier(const WebFormElement& form) {
667  base::string16 identifier = form.name();
668  CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
669  if (identifier.empty())
670    identifier = form.getAttribute(kId);
671
672  return identifier;
673}
674
675bool IsWebNodeVisible(const blink::WebNode& node) {
676  // In the bug http://crbug.com/237216 the form's bounding box is empty
677  // however the form has non empty children. Thus we need to look at the
678  // form's children.
679  int kNodeSearchDepth = 2;
680  return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
681}
682
683bool ClickElement(const WebDocument& document,
684                  const WebElementDescriptor& element_descriptor) {
685  WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
686  blink::WebElement element;
687
688  switch (element_descriptor.retrieval_method) {
689    case WebElementDescriptor::CSS_SELECTOR: {
690      WebExceptionCode ec = 0;
691      element = document.querySelector(web_descriptor, ec);
692      if (ec)
693        DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
694      break;
695    }
696    case WebElementDescriptor::ID:
697      element = document.getElementById(web_descriptor);
698      break;
699    case WebElementDescriptor::NONE:
700      return true;
701  }
702
703  if (element.isNull()) {
704    DVLOG(1) << "Could not find "
705             << element_descriptor.descriptor
706             << " by "
707             << RetrievalMethodToString(element_descriptor.retrieval_method)
708             << ".";
709    return false;
710  }
711
712  element.simulateClick();
713  return true;
714}
715
716// Fills |autofillable_elements| with all the auto-fillable form control
717// elements in |form_element|.
718void ExtractAutofillableElements(
719    const WebFormElement& form_element,
720    RequirementsMask requirements,
721    std::vector<WebFormControlElement>* autofillable_elements) {
722  WebVector<WebFormControlElement> control_elements;
723  form_element.getFormControlElements(control_elements);
724
725  autofillable_elements->clear();
726  for (size_t i = 0; i < control_elements.size(); ++i) {
727    WebFormControlElement element = control_elements[i];
728    if (!IsAutofillableElement(element))
729      continue;
730
731    if (requirements & REQUIRE_AUTOCOMPLETE) {
732      // TODO(isherman): WebKit currently doesn't handle the autocomplete
733      // attribute for select or textarea elements, but it probably should.
734      WebInputElement* input_element = toWebInputElement(&control_elements[i]);
735      if (IsAutofillableInputElement(input_element) &&
736          !SatisfiesRequireAutocomplete(*input_element))
737        continue;
738    }
739
740    autofillable_elements->push_back(element);
741  }
742}
743
744void WebFormControlElementToFormField(const WebFormControlElement& element,
745                                      ExtractMask extract_mask,
746                                      FormFieldData* field) {
747  DCHECK(field);
748  DCHECK(!element.isNull());
749  CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
750
751  // The label is not officially part of a WebFormControlElement; however, the
752  // labels for all form control elements are scraped from the DOM and set in
753  // WebFormElementToFormData.
754  field->name = element.nameForAutofill();
755  field->form_control_type = base::UTF16ToUTF8(element.formControlType());
756  field->autocomplete_attribute =
757      base::UTF16ToUTF8(element.getAttribute(kAutocomplete));
758  if (field->autocomplete_attribute.size() > kMaxDataLength) {
759    // Discard overly long attribute values to avoid DOS-ing the browser
760    // process.  However, send over a default string to indicate that the
761    // attribute was present.
762    field->autocomplete_attribute = "x-max-data-length-exceeded";
763  }
764
765  if (!IsAutofillableElement(element))
766    return;
767
768  const WebInputElement* input_element = toWebInputElement(&element);
769  if (IsAutofillableInputElement(input_element) ||
770      IsTextAreaElement(element)) {
771    field->is_autofilled = element.isAutofilled();
772    field->is_focusable = element.isFocusable();
773    field->should_autocomplete = element.autoComplete();
774    field->text_direction = element.directionForFormData() ==
775        "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
776  }
777
778  if (IsAutofillableInputElement(input_element)) {
779    if (IsTextInput(input_element))
780      field->max_length = input_element->maxLength();
781
782    field->is_checkable = IsCheckableElement(input_element);
783    field->is_checked = input_element->isChecked();
784  } else if (IsTextAreaElement(element)) {
785    // Nothing more to do in this case.
786  } else if (extract_mask & EXTRACT_OPTIONS) {
787    // Set option strings on the field if available.
788    DCHECK(IsSelectElement(element));
789    const WebSelectElement select_element = element.toConst<WebSelectElement>();
790    GetOptionStringsFromElement(select_element,
791                                &field->option_values,
792                                &field->option_contents);
793  }
794
795  if (!(extract_mask & EXTRACT_VALUE))
796    return;
797
798  base::string16 value = element.value();
799
800  if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) {
801    const WebSelectElement select_element = element.toConst<WebSelectElement>();
802    // Convert the |select_element| value to text if requested.
803    WebVector<WebElement> list_items = select_element.listItems();
804    for (size_t i = 0; i < list_items.size(); ++i) {
805      if (IsOptionElement(list_items[i])) {
806        const WebOptionElement option_element =
807            list_items[i].toConst<WebOptionElement>();
808        if (option_element.value() == value) {
809          value = option_element.text();
810          break;
811        }
812      }
813    }
814  }
815
816  // Constrain the maximum data length to prevent a malicious site from DOS'ing
817  // the browser: http://crbug.com/49332
818  if (value.size() > kMaxDataLength)
819    value = value.substr(0, kMaxDataLength);
820
821  field->value = value;
822}
823
824bool WebFormElementToFormData(
825    const blink::WebFormElement& form_element,
826    const blink::WebFormControlElement& form_control_element,
827    RequirementsMask requirements,
828    ExtractMask extract_mask,
829    FormData* form,
830    FormFieldData* field) {
831  CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
832  CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
833  CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
834
835  const WebFrame* frame = form_element.document().frame();
836  if (!frame)
837    return false;
838
839  if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
840    return false;
841
842  form->name = GetFormIdentifier(form_element);
843  form->origin = frame->document().url();
844  form->action = frame->document().completeURL(form_element.action());
845  form->user_submitted = form_element.wasUserSubmitted();
846
847  // If the completed URL is not valid, just use the action we get from
848  // WebKit.
849  if (!form->action.is_valid())
850    form->action = GURL(form_element.action());
851
852  // A map from a FormFieldData's name to the FormFieldData itself.
853  std::map<base::string16, FormFieldData*> name_map;
854
855  // The extracted FormFields.  We use pointers so we can store them in
856  // |name_map|.
857  ScopedVector<FormFieldData> form_fields;
858
859  WebVector<WebFormControlElement> control_elements;
860  form_element.getFormControlElements(control_elements);
861
862  // A vector of bools that indicate whether each field in the form meets the
863  // requirements and thus will be in the resulting |form|.
864  std::vector<bool> fields_extracted(control_elements.size(), false);
865
866  for (size_t i = 0; i < control_elements.size(); ++i) {
867    const WebFormControlElement& control_element = control_elements[i];
868
869    if (!IsAutofillableElement(control_element))
870      continue;
871
872    const WebInputElement* input_element = toWebInputElement(&control_element);
873    if (requirements & REQUIRE_AUTOCOMPLETE &&
874        IsAutofillableInputElement(input_element) &&
875        !SatisfiesRequireAutocomplete(*input_element))
876      continue;
877
878    // Create a new FormFieldData, fill it out and map it to the field's name.
879    FormFieldData* form_field = new FormFieldData;
880    WebFormControlElementToFormField(control_element, extract_mask, form_field);
881    form_fields.push_back(form_field);
882    // TODO(jhawkins): A label element is mapped to a form control element's id.
883    // field->name() will contain the id only if the name does not exist.  Add
884    // an id() method to WebFormControlElement and use that here.
885    name_map[form_field->name] = form_field;
886    fields_extracted[i] = true;
887  }
888
889  // If we failed to extract any fields, give up.  Also, to avoid overly
890  // expensive computation, we impose a maximum number of allowable fields.
891  if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
892    return false;
893
894  // Loop through the label elements inside the form element.  For each label
895  // element, get the corresponding form control element, use the form control
896  // element's name as a key into the <name, FormFieldData> map to find the
897  // previously created FormFieldData and set the FormFieldData's label to the
898  // label.firstChild().nodeValue() of the label element.
899  WebElementCollection labels = form_element.getElementsByHTMLTagName(kLabel);
900  DCHECK(!labels.isNull());
901  for (WebElement item = labels.firstItem(); !item.isNull();
902       item = labels.nextItem()) {
903    WebLabelElement label = item.to<WebLabelElement>();
904    WebFormControlElement field_element =
905        label.correspondingControl().to<WebFormControlElement>();
906
907    base::string16 element_name;
908    if (field_element.isNull()) {
909      // Sometimes site authors will incorrectly specify the corresponding
910      // field element's name rather than its id, so we compensate here.
911      element_name = label.getAttribute(kFor);
912    } else if (
913        !field_element.isFormControlElement() ||
914        field_element.formControlType() == kHidden) {
915      continue;
916    } else {
917      element_name = field_element.nameForAutofill();
918    }
919
920    std::map<base::string16, FormFieldData*>::iterator iter =
921        name_map.find(element_name);
922    if (iter != name_map.end()) {
923      base::string16 label_text = FindChildText(label);
924
925      // Concatenate labels because some sites might have multiple label
926      // candidates.
927      if (!iter->second->label.empty() && !label_text.empty())
928        iter->second->label += base::ASCIIToUTF16(" ");
929      iter->second->label += label_text;
930    }
931  }
932
933  // Loop through the form control elements, extracting the label text from
934  // the DOM.  We use the |fields_extracted| vector to make sure we assign the
935  // extracted label to the correct field, as it's possible |form_fields| will
936  // not contain all of the elements in |control_elements|.
937  for (size_t i = 0, field_idx = 0;
938       i < control_elements.size() && field_idx < form_fields.size(); ++i) {
939    // This field didn't meet the requirements, so don't try to find a label
940    // for it.
941    if (!fields_extracted[i])
942      continue;
943
944    const WebFormControlElement& control_element = control_elements[i];
945    if (form_fields[field_idx]->label.empty())
946      form_fields[field_idx]->label = InferLabelForElement(control_element);
947
948    if (field && form_control_element == control_element)
949      *field = *form_fields[field_idx];
950
951    ++field_idx;
952  }
953
954  // Copy the created FormFields into the resulting FormData object.
955  for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
956       iter != form_fields.end(); ++iter) {
957    form->fields.push_back(**iter);
958  }
959
960  return true;
961}
962
963bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
964                                           FormData* form,
965                                           FormFieldData* field,
966                                           RequirementsMask requirements) {
967  if (!IsAutofillableElement(element))
968    return false;
969
970  const WebFormElement form_element = element.form();
971  if (form_element.isNull())
972    return false;
973
974  ExtractMask extract_mask =
975      static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
976  return WebFormElementToFormData(form_element,
977                                  element,
978                                  requirements,
979                                  extract_mask,
980                                  form,
981                                  field);
982}
983
984void FillForm(const FormData& form, const WebFormControlElement& element) {
985  WebFormElement form_element = element.form();
986  if (form_element.isNull())
987    return;
988
989  ForEachMatchingFormField(form_element,
990                           element,
991                           form,
992                           true, /* only_focusable_elements */
993                           false, /* don't force override */
994                           &FillFormField);
995}
996
997void FillFormIncludingNonFocusableElements(const FormData& form_data,
998                                           const WebFormElement& form_element) {
999  if (form_element.isNull())
1000    return;
1001
1002  ForEachMatchingFormField(form_element,
1003                           WebInputElement(),
1004                           form_data,
1005                           false, /* only_focusable_elements */
1006                           true, /* force override */
1007                           &FillFormField);
1008}
1009
1010void PreviewForm(const FormData& form, const WebFormControlElement& element) {
1011  WebFormElement form_element = element.form();
1012  if (form_element.isNull())
1013    return;
1014
1015  ForEachMatchingFormField(form_element,
1016                           element,
1017                           form,
1018                           true, /* only_focusable_elements */
1019                           false, /* dont force override */
1020                           &PreviewFormField);
1021}
1022
1023bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
1024                                   bool was_autofilled) {
1025  WebFormElement form_element = element.form();
1026  if (form_element.isNull())
1027    return false;
1028
1029  std::vector<WebFormControlElement> control_elements;
1030  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1031                              &control_elements);
1032  for (size_t i = 0; i < control_elements.size(); ++i) {
1033    // There might be unrelated elements in this form which have already been
1034    // auto-filled.  For example, the user might have already filled the address
1035    // part of a form and now be dealing with the credit card section.  We only
1036    // want to reset the auto-filled status for fields that were previewed.
1037    WebFormControlElement control_element = control_elements[i];
1038
1039    // Only text input, textarea and select elements can be previewed.
1040    WebInputElement* input_element = toWebInputElement(&control_element);
1041    if (!IsTextInput(input_element) &&
1042        !IsMonthInput(input_element) &&
1043        !IsTextAreaElement(control_element) &&
1044        !IsSelectElement(control_element))
1045      continue;
1046
1047    // If the element is not auto-filled, we did not preview it,
1048    // so there is nothing to reset.
1049    if(!control_element.isAutofilled())
1050      continue;
1051
1052    if ((IsTextInput(input_element) ||
1053         IsMonthInput(input_element) ||
1054         IsTextAreaElement(control_element) ||
1055         IsSelectElement(control_element)) &&
1056        control_element.suggestedValue().isEmpty())
1057      continue;
1058
1059    // Clear the suggested value. For the initiating node, also restore the
1060    // original value.
1061    if (IsTextInput(input_element) || IsMonthInput(input_element) ||
1062        IsTextAreaElement(control_element)) {
1063      control_element.setSuggestedValue(WebString());
1064      bool is_initiating_node = (element == control_element);
1065      if (is_initiating_node) {
1066        control_element.setAutofilled(was_autofilled);
1067        // Clearing the suggested value in the focused node (above) can cause
1068        // selection to be lost. We force selection range to restore the text
1069        // cursor.
1070        int length = control_element.value().length();
1071        control_element.setSelectionRange(length, length);
1072      } else {
1073        control_element.setAutofilled(false);
1074      }
1075    } else if (IsSelectElement(control_element)) {
1076      control_element.setSuggestedValue(WebString());
1077      control_element.setAutofilled(false);
1078    }
1079  }
1080
1081  return true;
1082}
1083
1084bool FormWithElementIsAutofilled(const WebInputElement& element) {
1085  WebFormElement form_element = element.form();
1086  if (form_element.isNull())
1087    return false;
1088
1089  std::vector<WebFormControlElement> control_elements;
1090  ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1091                              &control_elements);
1092  for (size_t i = 0; i < control_elements.size(); ++i) {
1093    WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1094    if (!IsAutofillableInputElement(input_element))
1095      continue;
1096
1097    if (input_element->isAutofilled())
1098      return true;
1099  }
1100
1101  return false;
1102}
1103
1104bool IsWebpageEmpty(const blink::WebFrame* frame) {
1105  blink::WebDocument document = frame->document();
1106
1107  return IsWebElementEmpty(document.head()) &&
1108         IsWebElementEmpty(document.body());
1109}
1110
1111bool IsWebElementEmpty(const blink::WebElement& element) {
1112  // This array contains all tags which can be present in an empty page.
1113  const char* const kAllowedValue[] = {
1114    "script",
1115    "meta",
1116    "title",
1117  };
1118  const size_t kAllowedValueLength = arraysize(kAllowedValue);
1119
1120  if (element.isNull())
1121    return true;
1122  // The childNodes method is not a const method. Therefore it cannot be called
1123  // on a const reference. Therefore we need a const cast.
1124  const blink::WebNodeList& children =
1125      const_cast<blink::WebElement&>(element).childNodes();
1126  for (size_t i = 0; i < children.length(); ++i) {
1127    const blink::WebNode& item = children.item(i);
1128
1129    if (item.isTextNode() &&
1130        !base::ContainsOnlyChars(item.nodeValue().utf8(),
1131                                 base::kWhitespaceASCII))
1132      return false;
1133
1134    // We ignore all other items with names which begin with
1135    // the character # because they are not html tags.
1136    if (item.nodeName().utf8()[0] == '#')
1137      continue;
1138
1139    bool tag_is_allowed = false;
1140    // Test if the item name is in the kAllowedValue array
1141    for (size_t allowed_value_index = 0;
1142         allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1143      if (HasTagName(item,
1144                     WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1145        tag_is_allowed = true;
1146        break;
1147      }
1148    }
1149    if (!tag_is_allowed)
1150      return false;
1151  }
1152  return true;
1153}
1154
1155gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) {
1156  gfx::Rect bounding_box(element->boundsInViewportSpace());
1157  return gfx::RectF(bounding_box.x() * scale,
1158                    bounding_box.y() * scale,
1159                    bounding_box.width() * scale,
1160                    bounding_box.height() * scale);
1161}
1162
1163}  // namespace autofill
1164