HtmlPolicyBuilder.java revision 79b4c29af1261d95c663bdf0003b70cb0eb8000e
1// Copyright (c) 2011, Mike Samuel
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions
6// are met:
7//
8// Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// Redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution.
13// Neither the name of the OWASP nor the names of its contributors may
14// be used to endorse or promote products derived from this software
15// without specific prior written permission.
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28
29package org.owasp.html;
30
31import java.util.List;
32import java.util.Map;
33import java.util.Set;
34import java.util.regex.Pattern;
35
36import javax.annotation.Nullable;
37import javax.annotation.concurrent.NotThreadSafe;
38
39import com.google.common.base.Predicate;
40import com.google.common.collect.ImmutableList;
41import com.google.common.collect.ImmutableMap;
42import com.google.common.collect.ImmutableSet;
43import com.google.common.collect.Maps;
44import com.google.common.collect.Sets;
45
46
47/**
48 * Conveniences for configuring policies for the {@link HtmlSanitizer}.
49 *
50 * <h3>Usage</h3>
51 * <p>
52 * To create a policy, first construct an instance of this class; then call
53 * <code>allow&hellip;</code> methods to turn on tags, attributes, and other
54 * processing modes; and finally call <code>build(renderer)</code> or
55 * <code>toFactory()</code>.
56 * </p>
57 * <pre class="prettyprint lang-java">
58 * // Define the policy.
59 * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policyDefinition
60 *     = new HtmlPolicyBuilder()
61 *         .allowElements("a", "p")
62 *         .allowAttributes("href").onElements("a")
63 *         .toFactory();
64 *
65 * // Sanitize your output.
66 * HtmlSanitizer.sanitize(myHtml. policyDefinition.apply(myHtmlStreamRenderer));
67 * </pre>
68 *
69 * <h3>Embedded Content</h3>
70 * <p>
71 * Embedded URLs are filtered by
72 * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
73 * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
74 * so you can easily white-list widely used policies that don't violate the
75 * current pages origin.  See "Customization" below for ways to do further
76 * filtering.  If you allow links it might be worthwhile to
77 * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
78 * {@code rel=nofollow}.
79 * </p>
80 * <p>
81 * This class simply throws out all embedded JS.
82 * Use a custom element or attribute policy to allow through
83 * signed or otherwise known-safe code.
84 * Check out the Caja project if you need a way to contain third-party JS.
85 * </p>
86 * <p>
87 * This class does not attempt to faithfully parse and sanitize CSS.
88 * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
89 * that allows through a few CSS properties that allow textual styling, but that
90 * disallow image loading, history stealing, layout breaking, code execution,
91 * etc.
92 * </p>
93 *
94 * <h3>Customization</h3>
95 * <p>
96 * You can easily do custom processing on tags and attributes by supplying your
97 * own {@link ElementPolicy element policy} or
98 * {@link AttributePolicy attribute policy} when calling
99 * <code>allow&hellip;</code>.
100 * E.g. to convert headers into {@code <div>}s, you could use an element policy
101 * </p>
102 * <pre class="prettyprint lang-java">
103 *     new HtmlPolicyBuilder
104 *         .allowElement(
105 *         new ElementPolicy() {
106 *           public String apply(String elementName, List<String> attributes) {
107 *             attributes.add("class");
108 *             attributes.add("header-" + elementName);
109 *             return "div";
110 *           }
111 *         },
112 *         "h1", "h2", "h3", "h4", "h5", "h6")
113 *         .build(outputChannel)
114 * </pre>
115 *
116 * <h3>Rules of Thumb</h3>
117 * <p>
118 * Throughout this class, several rules hold:
119 * <ul>
120 *   <li>Everything is denied by default.  There are
121 *     <code>disallow&hellip;</code> methods, but those reverse
122 *     allows instead of rolling back overly permissive defaults.
123 *   <li>The order of allows and disallows does not matter.
124 *     Disallows trump allows whether they occur before or after them.
125 *     The only method that needs to be called in a particular place is
126 *     {@link HtmlPolicyBuilder#build}.
127 *     Allows or disallows after {@code build} is called have no
128 *     effect on the already built policy.
129 *   <li>Element and attribute policies are applied in the following order:
130 *     element specific attribute policy, global attribute policy, element
131 *     policy.
132 *     Element policies come last so they can observe all the post-processed
133 *     attributes, and so they can add attributes that are exempt from
134 *     attribute policies.
135 *     Element specific policies go first, so they can normalize content to
136 *     a form that might be acceptable to a more simplistic global policy.
137 * </ul>
138 *
139 * <h3>Thread safety and efficiency</h3>
140 * <p>
141 * This class is not thread-safe.  The resulting policy will not violate its
142 * security guarantees as a result of race conditions, but is not thread safe
143 * because it maintains state to track whether text inside disallowed elements
144 * should be suppressed.
145 * <p>
146 * The resulting policy can be reused, but if you use the
147 * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
148 * binding policies to output channels is cheap so there's no need.
149 * </p>
150 *
151 * @author Mike Samuel <mikesamuel@gmail.com>
152 */
153@TCB
154@NotThreadSafe
155public class HtmlPolicyBuilder {
156  /**
157   * The default set of elements that are removed if they have no attributes.
158   * Since {@code <img>} is in this set, by default, a policy will remove
159   * {@code <img src=javascript:alert(1337)>} because its URL is not allowed
160   * and it has no other attributes that would warrant it appearing in the
161   * output.
162   */
163  public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
164      = ImmutableSet.of("a", "font", "img", "input", "span");
165
166  private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
167  private final Map<String, Map<String, AttributePolicy>> attrPolicies
168      = Maps.newLinkedHashMap();
169  private final Map<String, AttributePolicy> globalAttrPolicies
170      = Maps.newLinkedHashMap();
171  private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
172  private final Set<String> skipIfEmpty = Sets.newLinkedHashSet(
173      DEFAULT_SKIP_IF_EMPTY);
174  private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
175  private boolean requireRelNofollowOnLinks, allowStyling;
176
177  /**
178   * Allows the named elements.
179   */
180  public HtmlPolicyBuilder allowElements(String... elementNames) {
181    return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
182  }
183
184  /**
185   * Disallows the named elements.  Elements are disallowed by default, so
186   * there is no need to disallow elements, unless you are making an exception
187   * based on an earlier allow.
188   */
189  public HtmlPolicyBuilder disallowElements(String... elementNames) {
190    return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
191  }
192
193  /**
194   * Allow the given elements with the given policy.
195   *
196   * @param policy May remove or add attributes, change the element name, or
197   *    deny the element.
198   */
199  public HtmlPolicyBuilder allowElements(
200      ElementPolicy policy, String... elementNames) {
201    invalidateCompiledState();
202    for (String elementName : elementNames) {
203      elementName = HtmlLexer.canonicalName(elementName);
204      ElementPolicy newPolicy = ElementPolicy.Util.join(
205          elPolicies.get(elementName), policy);
206      // Don't remove if newPolicy is the always reject policy since we want
207      // that to infect later allowElement calls for this particular element
208      // name.  rejects should have higher priority than allows.
209      elPolicies.put(elementName, newPolicy);
210      if (!textContainers.containsKey(elementName)
211          && TagBalancingHtmlStreamEventReceiver
212              .allowsPlainTextualContent(elementName)) {
213        textContainers.put(elementName, true);
214      }
215    }
216    return this;
217  }
218
219  /**
220   * A canned policy that allows a number of common formatting elements.
221   */
222  public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
223    return allowElements(
224        "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
225        "strike", "tt", "code", "big", "small", "br", "span");
226  }
227
228  /**
229   * A canned policy that allows a number of common block elements.
230   */
231  public HtmlPolicyBuilder allowCommonBlockElements() {
232    return allowElements(
233        "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
234        "blockquote");
235  }
236
237  /**
238   * Allows text content in the named elements.
239   * By default, text content is allowed in any
240   * {@link #allowElements allowed elements} that can contain character data per
241   * the HTML5 spec, but text content is not allowed by default in elements that
242   * contain content of other kinds (like JavaScript in {@code <script>}
243   * elements.
244   * <p>
245   * To write a policy that whitelists {@code <script>} or {@code <style>}
246   * elements, first {@code allowTextIn("script")}.
247   */
248  public HtmlPolicyBuilder allowTextIn(String... elementNames) {
249    invalidateCompiledState();
250    for (String elementName : elementNames) {
251      elementName = HtmlLexer.canonicalName(elementName);
252      textContainers.put(elementName, true);
253    }
254    return this;
255  }
256
257  public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
258    invalidateCompiledState();
259    for (String elementName : elementNames) {
260      elementName = HtmlLexer.canonicalName(elementName);
261      textContainers.put(elementName, false);
262    }
263    return this;
264  }
265
266  /**
267   * Assuming the given elements are allowed, allows them to appear without
268   * attributes.
269   *
270   * @see #DEFAULT_SKIP_IF_EMPTY
271   * @see #disallowWithoutAttributes
272   */
273  public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
274    invalidateCompiledState();
275    for (String elementName : elementNames) {
276      elementName = HtmlLexer.canonicalName(elementName);
277      skipIfEmpty.remove(elementName);
278    }
279    return this;
280  }
281
282  /**
283   * Disallows the given elements from appearing without attributes.
284   *
285   * @see #DEFAULT_SKIP_IF_EMPTY
286   * @see #allowWithoutAttributes
287   */
288  public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
289    invalidateCompiledState();
290    for (String elementName : elementNames) {
291      elementName = HtmlLexer.canonicalName(elementName);
292      skipIfEmpty.add(elementName);
293    }
294    return this;
295  }
296
297  /**
298   * Returns an object that lets you associate policies with the given
299   * attributes, and allow them globally or on specific elements.
300   */
301  public AttributeBuilder allowAttributes(String... attributeNames) {
302    ImmutableList.Builder<String> b = ImmutableList.builder();
303    for (String attributeName : attributeNames) {
304      b.add(HtmlLexer.canonicalName(attributeName));
305    }
306    return new AttributeBuilder(b.build());
307  }
308
309  /**
310   * Reverse an earlier attribute {@link #allowAttributes allow}.
311   * <p>
312   * For this to have an effect you must call at least one of
313   * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
314   * <p>
315   * Attributes are disallowed by default, so there is no need to call this
316   * with a laundry list of attribute/element pairs.
317   */
318  public AttributeBuilder disallowAttributes(String... attributeNames) {
319    return this.allowAttributes(attributeNames)
320        .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
321  }
322
323
324  private HtmlPolicyBuilder allowAttributesGlobally(
325      AttributePolicy policy, List<String> attributeNames) {
326    invalidateCompiledState();
327    for (String attributeName : attributeNames) {
328      // We reinterpret the identity policy later via policy joining since its
329      // the default passed from the policy-less method, but we don't do
330      // anything here since we don't know until build() is called whether the
331      // policy author wants to allow certain URL protocols or wants to deal
332      // with styles.
333      AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
334      globalAttrPolicies.put(
335          attributeName, AttributePolicy.Util.join(oldPolicy, policy));
336    }
337    return this;
338  }
339
340  private HtmlPolicyBuilder allowAttributesOnElements(
341      AttributePolicy policy, List<String> attributeNames,
342      List<String> elementNames) {
343    invalidateCompiledState();
344    for (String elementName : elementNames) {
345      Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
346      if (policies == null) {
347        policies = Maps.newLinkedHashMap();
348        attrPolicies.put(elementName, policies);
349      }
350      for (String attributeName : attributeNames) {
351        AttributePolicy oldPolicy = policies.get(attributeName);
352        policies.put(
353            attributeName,
354            AttributePolicy.Util.join(oldPolicy, policy));
355      }
356    }
357    return this;
358  }
359
360  /**
361   * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a>
362   * to links.
363   */
364  public HtmlPolicyBuilder requireRelNofollowOnLinks() {
365    invalidateCompiledState();
366    this.requireRelNofollowOnLinks = true;
367    return this;
368  }
369
370  /**
371   * Adds to the set of protocols that are allowed in URL attributes.
372   * For each URL attribute that is allowed, we further constrain it by
373   * only allowing the value through if it specifies no protocol, or if it
374   * specifies one in the allowedProtocols white-list.
375   * This is done regardless of whether any protocols have been allowed, so
376   * allowing the attribute "href" globally with the identity policy but
377   * not white-listing any protocols, effectively disallows the "href"
378   * attribute globally.
379   * <p>
380   * Do not allow any <code>*script</code> such as <code>javascript</code>
381   * protocols if you might use this policy with untrusted code.
382   */
383  public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
384    invalidateCompiledState();
385    // If there is at least one allowed protocol, then allow URLs and
386    // add a filter that checks href and src values.
387
388    // Do not allow href and srcs through otherwise, and only allow on images
389    // and links.
390    for (String protocol : protocols) {
391      protocol = Strings.toLowerCase(protocol);
392      allowedProtocols.add(protocol);
393    }
394    return this;
395  }
396
397  /**
398   * Reverses a decision made by {@link #allowUrlProtocols}.
399   */
400  public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
401    invalidateCompiledState();
402    for (String protocol : protocols) {
403      protocol = Strings.toLowerCase(protocol);
404      allowedProtocols.remove(protocol);
405    }
406    return this;
407  }
408
409  /**
410   * A canned URL protocol policy that allows <code>http</code>,
411   * <code>https</code>, and <code>mailto</code>.
412   */
413  public HtmlPolicyBuilder allowStandardUrlProtocols() {
414    return allowUrlProtocols("http", "https", "mailto");
415  }
416
417  /**
418   * Convert <code>style="&lt;CSS&gt;"</code> to simple non-JS containing
419   * <code>&lt;font&gt;</code> tags to allow color, font-size, typeface, and
420   * other styling.
421   */
422  public HtmlPolicyBuilder allowStyling() {
423    invalidateCompiledState();
424    allowStyling = true;
425    return this;
426  }
427
428  /**
429   * Names of attributes from HTML 4 whose values are URLs.
430   * Other attributes, e.g. <code>style</code> may contain URLs even though
431   * there values are not URLs.
432   */
433  private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of(
434      "action", "archive", "background", "cite", "classid", "codebase", "data",
435      "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
436      "profile", "src", "usemap");
437
438  /**
439   * Produces a policy based on the allow and disallow calls previously made.
440   *
441   * @param out receives calls to open only tags allowed by
442   *      previous calls to this object.
443   *      Typically a {@link HtmlStreamRenderer}.
444   */
445  public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
446    return toFactory().apply(out);
447  }
448
449  /**
450   * Produces a policy based on the allow and disallow calls previously made.
451   *
452   * @param out receives calls to open only tags allowed by
453   *      previous calls to this object.
454   *      Typically a {@link HtmlStreamRenderer}.
455   * @param listener is notified of dropped tags and attributes so that
456   *      intrusion detection systems can be alerted to questionable HTML.
457   *      If {@code null} then no notifications are sent.
458   * @param context if {@code (listener != null)} then the context value passed
459   *      with alerts.  This can be used to let the listener know from which
460   *      connection or request the questionable HTML was received.
461   */
462  public <CTX> HtmlSanitizer.Policy build(
463      HtmlStreamEventReceiver out,
464      @Nullable HtmlChangeListener<? super CTX> listener,
465      @Nullable CTX context) {
466    return toFactory().apply(out, listener, context);
467  }
468
469  /**
470   * Like {@link #build} but can be reused to create many different policies
471   * each backed by a different output channel.
472   */
473  public PolicyFactory toFactory() {
474    ImmutableSet.Builder<String> textContainers = ImmutableSet.builder();
475    for (Map.Entry<String, Boolean> textContainer
476         : this.textContainers.entrySet()) {
477      if (Boolean.TRUE.equals(textContainer.getValue())) {
478        textContainers.add(textContainer.getKey());
479  }
480    }
481    return new PolicyFactory(
482        compilePolicies(), textContainers.build(), allowStyling);
483  }
484
485  // Speed up subsequent builds by caching the compiled policies.
486  private transient ImmutableMap<String, ElementAndAttributePolicies>
487      compiledPolicies;
488
489  /** Called by mutators to signal that any compiled policy is out-of-date. */
490  private void invalidateCompiledState() {
491    compiledPolicies = null;
492  }
493
494  private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() {
495    if (compiledPolicies != null) { return compiledPolicies; }
496
497    // Copy maps before normalizing in case builder is reused.
498    Map<String, ElementPolicy> elPolicies
499        = Maps.newLinkedHashMap(this.elPolicies);
500    Map<String, Map<String, AttributePolicy>> attrPolicies
501        = Maps.newLinkedHashMap(this.attrPolicies);
502    for (Map.Entry<String, Map<String, AttributePolicy>> e :
503         attrPolicies.entrySet()) {
504      e.setValue(Maps.newLinkedHashMap(e.getValue()));
505    }
506    Map<String, AttributePolicy> globalAttrPolicies
507        = Maps.newLinkedHashMap(this.globalAttrPolicies);
508    Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
509
510    // Implement requireRelNofollowOnLinks
511    if (requireRelNofollowOnLinks) {
512      elPolicies.put(
513          "a",
514          ElementPolicy.Util.join(
515              elPolicies.get("a"),
516              new ElementPolicy() {
517                public String apply(String elementName, List<String> attrs) {
518                  for (int i = 0, n = attrs.size(); i < n; i += 2) {
519                    if ("href".equals(attrs.get(i))) {
520                      attrs.add("rel");
521                      attrs.add("nofollow");
522                      break;
523                    }
524                  }
525                  return elementName;
526                }
527              }));
528    }
529
530    // Implement protocol policies.
531    // For each URL attribute that is allowed, we further constrain it by
532    // only allowing the value through if it specifies no protocol, or if it
533    // specifies one in the allowedProtocols white-list.
534    // This is done regardless of whether any protocols have been allowed, so
535    // allowing the attribute "href" globally with the identity policy but
536    // not white-listing any protocols, effectively disallows the "href"
537    // attribute globally.
538    {
539      AttributePolicy urlAttributePolicy;
540      if (allowedProtocols.size() == 3
541          && allowedProtocols.contains("mailto")
542          && allowedProtocols.contains("http")
543          && allowedProtocols.contains("https")) {
544        urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
545      } else {
546        urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
547            allowedProtocols);
548      }
549      Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES);
550      for (String urlAttributeName : URL_ATTRIBUTE_NAMES) {
551        if (globalAttrPolicies.containsKey(urlAttributeName)) {
552          toGuard.remove(urlAttributeName);
553          globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join(
554              urlAttributePolicy, globalAttrPolicies.get(urlAttributeName)));
555        }
556      }
557      // Implement guards not implemented on global policies in the per-element
558      // policy maps.
559      for (Map.Entry<String, Map<String, AttributePolicy>> e
560           : attrPolicies.entrySet()) {
561        Map<String, AttributePolicy> policies = e.getValue();
562        for (String urlAttributeName : toGuard) {
563          if (policies.containsKey(urlAttributeName)) {
564            policies.put(urlAttributeName, AttributePolicy.Util.join(
565                urlAttributePolicy, policies.get(urlAttributeName)));
566          }
567        }
568      }
569    }
570
571    ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
572        = ImmutableMap.builder();
573    for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
574      String elementName = e.getKey();
575      ElementPolicy elPolicy = e.getValue();
576      if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
577        continue;
578      }
579
580      Map<String, AttributePolicy> elAttrPolicies
581          = attrPolicies.get(elementName);
582      if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); }
583      ImmutableMap.Builder<String, AttributePolicy> attrs
584          = ImmutableMap.builder();
585      for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
586        String attributeName = ape.getKey();
587        if (globalAttrPolicies.containsKey(attributeName)) { continue; }
588        AttributePolicy policy = ape.getValue();
589        if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
590          attrs.put(attributeName, policy);
591        }
592      }
593      for (Map.Entry<String, AttributePolicy> ape
594           : globalAttrPolicies.entrySet()) {
595        String attributeName = ape.getKey();
596        AttributePolicy policy = AttributePolicy.Util.join(
597            elAttrPolicies.get(attributeName), ape.getValue());
598        if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
599          attrs.put(attributeName, policy);
600        }
601      }
602
603      policiesBuilder.put(
604          elementName,
605          new ElementAndAttributePolicies(
606              elementName,
607              elPolicy, attrs.build(), skipIfEmpty.contains(elementName)));
608    }
609    return compiledPolicies = policiesBuilder.build();
610  }
611
612  /**
613   * Builds the relationship between attributes, the values that they may have,
614   * and the elements on which they may appear.
615   *
616   * @author Mike Samuel
617   */
618  public final class AttributeBuilder {
619    private final List<String> attributeNames;
620    private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
621
622    AttributeBuilder(List<? extends String> attributeNames) {
623      this.attributeNames = ImmutableList.copyOf(attributeNames);
624    }
625
626    /**
627     * Filters and/or transforms the attribute values
628     * allowed by later {@code allow*} calls.
629     * Multiple calls to {@code matching} are combined so that the policies
630     * receive the value in order, each seeing the value after any
631     * transformation by a previous policy.
632     */
633    public AttributeBuilder matching(AttributePolicy policy) {
634      this.policy = AttributePolicy.Util.join(this.policy, policy);
635      return this;
636    }
637
638    /**
639     * Restrict the values allowed by later {@code allow*} calls to those
640     * matching the pattern.
641     * Multiple calls to {@code matching} are combined to restrict to the
642     * intersection of possible matched values.
643     */
644    public AttributeBuilder matching(final Pattern pattern) {
645      return matching(new AttributePolicy() {
646        public @Nullable String apply(
647            String elementName, String attributeName, String value) {
648          return pattern.matcher(value).matches() ? value : null;
649        }
650      });
651    }
652
653    /**
654     * Restrict the values allowed by later {@code allow*} calls to those
655     * matching the given predicate.
656     * Multiple calls to {@code matching} are combined to restrict to the
657     * intersection of possible matched values.
658     */
659    public AttributeBuilder matching(
660        final Predicate<? super String> filter) {
661      return matching(new AttributePolicy() {
662        public @Nullable String apply(
663            String elementName, String attributeName, String value) {
664          return filter.apply(value) ? value : null;
665        }
666      });
667    }
668
669    /**
670     * Restrict the values allowed by later {@code allow*} calls to those
671     * supplied.
672     * Multiple calls to {@code matching} are combined to restrict to the
673     * intersection of possible matched values.
674     */
675    public AttributeBuilder matching(
676        boolean ignoreCase, String... allowedValues) {
677      return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
678    }
679
680    /**
681     * Restrict the values allowed by later {@code allow*} calls to those
682     * supplied.
683     * Multiple calls to {@code matching} are combined to restrict to the
684     * intersection of possible matched values.
685     */
686    public AttributeBuilder matching(
687        final boolean ignoreCase, Set<? extends String> allowedValues) {
688      final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
689      return matching(new AttributePolicy() {
690        public @Nullable String apply(
691            String elementName, String attributeName, String value) {
692          if (ignoreCase) { value = Strings.toLowerCase(value); }
693          return allowed.contains(value) ? value : null;
694        }
695      });
696    }
697
698    /**
699     * Allows the given attributes on any elements but filters the
700     * attributes' values based on previous calls to {@code matching(...)}.
701     * Global attribute policies are applied after element specific policies.
702     * Be careful of using this with attributes like <code>type</code> which
703     * have different meanings on different attributes.
704     * Also be careful of allowing globally attributes like <code>href</code>
705     * which can have more far-reaching effects on tags like
706     * <code>&lt;base&gt;</code> and <code>&lt;link&gt;</code> than on
707     * <code>&lt;a&gt;</code> because in the former, they have an effect without
708     * user interaction and can change the behavior of the current page.
709     */
710    public HtmlPolicyBuilder globally() {
711      return HtmlPolicyBuilder.this.allowAttributesGlobally(
712          policy, attributeNames);
713    }
714
715    /**
716     * Allows the named attributes on the given elements but filters the
717     * attributes' values based on previous calls to {@code matching(...)}.
718     */
719    public HtmlPolicyBuilder onElements(String... elementNames) {
720      ImmutableList.Builder<String> b = ImmutableList.builder();
721      for (String elementName : elementNames) {
722        b.add(HtmlLexer.canonicalName(elementName));
723      }
724      return HtmlPolicyBuilder.this.allowAttributesOnElements(
725          policy, attributeNames, b.build());
726    }
727  }
728}
729