1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * Parse a very small subset of HTML.  This ensures that insecure HTML /
7 * javascript cannot be injected into the new tab page.
8 * @param {string} s The string to parse.
9 * @param {Array.<string>=} opt_extraTags Optional extra allowed tags.
10 * @param {Object.<string, function(Node, string):boolean>=} opt_extraAttrs
11 *     Optional extra allowed attributes (all tags are run through these).
12 * @throws {Error} In case of non supported markup.
13 * @return {DocumentFragment} A document fragment containing the DOM tree.
14 */
15var parseHtmlSubset = (function() {
16  'use strict';
17
18  var allowedAttributes = {
19    'href': function(node, value) {
20      // Only allow a[href] starting with chrome:// and https://
21      return node.tagName == 'A' && (value.indexOf('chrome://') == 0 ||
22          value.indexOf('https://') == 0);
23    },
24    'target': function(node, value) {
25      // Allow a[target] but reset the value to "".
26      if (node.tagName != 'A')
27        return false;
28      node.setAttribute('target', '');
29      return true;
30    }
31  };
32
33  /**
34   * Whitelist of tag names allowed in parseHtmlSubset.
35   * @type {!Array.<string>}
36   * @const
37   */
38  var allowedTags = ['A', 'B', 'STRONG'];
39
40  /** @param {...Object} var_args Objects to merge. */
41  function merge(var_args) {
42    var clone = {};
43    for (var i = 0; i < arguments.length; ++i) {
44      if (typeof arguments[i] == 'object') {
45        for (var key in arguments[i]) {
46          if (arguments[i].hasOwnProperty(key))
47            clone[key] = arguments[i][key];
48        }
49      }
50    }
51    return clone;
52  }
53
54  function walk(n, f) {
55    f(n);
56    for (var i = 0; i < n.childNodes.length; i++) {
57      walk(n.childNodes[i], f);
58    }
59  }
60
61  function assertElement(tags, node) {
62    if (tags.indexOf(node.tagName) == -1)
63      throw Error(node.tagName + ' is not supported');
64  }
65
66  function assertAttribute(attrs, attrNode, node) {
67    var n = attrNode.nodeName;
68    var v = attrNode.nodeValue;
69    if (!attrs.hasOwnProperty(n) || !attrs[n](node, v))
70      throw Error(node.tagName + '[' + n + '="' + v + '"] is not supported');
71  }
72
73  return function(s, opt_extraTags, opt_extraAttrs) {
74    var extraTags =
75        (opt_extraTags || []).map(function(str) { return str.toUpperCase(); });
76    var tags = allowedTags.concat(extraTags);
77    var attrs = merge(allowedAttributes, opt_extraAttrs || {});
78
79    var doc = document.implementation.createHTMLDocument('');
80    var r = doc.createRange();
81    r.selectNode(doc.body);
82    // This does not execute any scripts because the document has no view.
83    var df = r.createContextualFragment(s);
84    walk(df, function(node) {
85      switch (node.nodeType) {
86        case Node.ELEMENT_NODE:
87          assertElement(tags, node);
88          var nodeAttrs = node.attributes;
89          for (var i = 0; i < nodeAttrs.length; ++i) {
90            assertAttribute(attrs, nodeAttrs[i], node);
91          }
92          break;
93
94        case Node.COMMENT_NODE:
95        case Node.DOCUMENT_FRAGMENT_NODE:
96        case Node.TEXT_NODE:
97          break;
98
99        default:
100          throw Error('Node type ' + node.nodeType + ' is not supported');
101      }
102    });
103    return df;
104  };
105})();
106