001    // Copyright (c) 2011, Mike Samuel
002    // All rights reserved.
003    //
004    // Redistribution and use in source and binary forms, with or without
005    // modification, are permitted provided that the following conditions
006    // are met:
007    //
008    // Redistributions of source code must retain the above copyright
009    // notice, this list of conditions and the following disclaimer.
010    // Redistributions in binary form must reproduce the above copyright
011    // notice, this list of conditions and the following disclaimer in the
012    // documentation and/or other materials provided with the distribution.
013    // Neither the name of the OWASP nor the names of its contributors may
014    // be used to endorse or promote products derived from this software
015    // without specific prior written permission.
016    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027    // POSSIBILITY OF SUCH DAMAGE.
028    
029    package org.owasp.html;
030    
031    import java.util.List;
032    import java.util.Map;
033    import java.util.Set;
034    import java.util.regex.Pattern;
035    
036    import javax.annotation.Nullable;
037    import javax.annotation.concurrent.NotThreadSafe;
038    
039    import com.google.common.base.Predicate;
040    import com.google.common.collect.ImmutableList;
041    import com.google.common.collect.ImmutableMap;
042    import com.google.common.collect.ImmutableSet;
043    import com.google.common.collect.Maps;
044    import com.google.common.collect.Sets;
045    
046    
047    /**
048     * Conveniences for configuring policies for the {@link HtmlSanitizer}.
049     *
050     * <h3>Usage</h3>
051     * <p>
052     * To create a policy, first construct an instance of this class; then call
053     * <code>allow&hellip;</code> methods to turn on tags, attributes, and other
054     * processing modes; and finally call <code>build(renderer)</code> or
055     * <code>toFactory()</code>.
056     * </p>
057     * <pre class="prettyprint lang-java">
058     * // Define the policy.
059     * Function&lt;HtmlStreamEventReceiver, HtmlSanitizer.Policy&gt; policy
060     *     = new HtmlPolicyBuilder()
061     *         .allowElements("a", "p")
062     *         .allowAttributes("href").onElements("a")
063     *         .toFactory();
064     *
065     * // Sanitize your output.
066     * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
067     * </pre>
068     *
069     * <h3>Embedded Content</h3>
070     * <p>
071     * Embedded URLs are filtered by
072     * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
073     * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
074     * so you can easily white-list widely used policies that don't violate the
075     * current pages origin.  See "Customization" below for ways to do further
076     * filtering.  If you allow links it might be worthwhile to
077     * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
078     * {@code rel=nofollow}.
079     * </p>
080     * <p>
081     * This class simply throws out all embedded JS.
082     * Use a custom element or attribute policy to allow through
083     * signed or otherwise known-safe code.
084     * Check out the Caja project if you need a way to contain third-party JS.
085     * </p>
086     * <p>
087     * This class does not attempt to faithfully parse and sanitize CSS.
088     * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
089     * that allows through a few CSS properties that allow textual styling, but that
090     * disallow image loading, history stealing, layout breaking, code execution,
091     * etc.
092     * </p>
093     *
094     * <h3>Customization</h3>
095     * <p>
096     * You can easily do custom processing on tags and attributes by supplying your
097     * own {@link ElementPolicy element policy} or
098     * {@link AttributePolicy attribute policy} when calling
099     * <code>allow&hellip;</code>.
100     * E.g. to convert headers into {@code <div>}s, you could use an element policy
101     * </p>
102     * <pre class="prettyprint lang-java">
103     * new HtmlPolicyBuilder()
104     *   .allowElement(
105     *     new ElementPolicy() {
106     *       public String apply(String elementName, List&lt;String> attributes) {
107     *         attributes.add("class");
108     *         attributes.add("header-" + elementName);
109     *         return "div";
110     *       }
111     *     },
112     *     "h1", "h2", "h3", "h4", "h5", "h6")
113     *   .build(outputChannel)
114     * </pre>
115     *
116     * <h3>Rules of Thumb</h3>
117     * <p>
118     * Throughout this class, several rules hold:
119     * <ul>
120     *   <li>Everything is denied by default.  There are
121     *     <code>disallow&hellip;</code> methods, but those reverse
122     *     allows instead of rolling back overly permissive defaults.
123     *   <li>The order of allows and disallows does not matter.
124     *     Disallows trump allows whether they occur before or after them.
125     *     The only method that needs to be called in a particular place is
126     *     {@link HtmlPolicyBuilder#build}.
127     *     Allows or disallows after {@code build} is called have no
128     *     effect on the already built policy.
129     *   <li>Element and attribute policies are applied in the following order:
130     *     element specific attribute policy, global attribute policy, element
131     *     policy.
132     *     Element policies come last so they can observe all the post-processed
133     *     attributes, and so they can add attributes that are exempt from
134     *     attribute policies.
135     *     Element specific policies go first, so they can normalize content to
136     *     a form that might be acceptable to a more simplistic global policy.
137     * </ul>
138     *
139     * <h3>Thread safety and efficiency</h3>
140     * <p>
141     * This class is not thread-safe.  The resulting policy will not violate its
142     * security guarantees as a result of race conditions, but is not thread safe
143     * because it maintains state to track whether text inside disallowed elements
144     * should be suppressed.
145     * <p>
146     * The resulting policy can be reused, but if you use the
147     * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
148     * binding policies to output channels is cheap so there's no need.
149     * </p>
150     *
151     * @author Mike Samuel <mikesamuel@gmail.com>
152     */
153    @TCB
154    @NotThreadSafe
155    public class HtmlPolicyBuilder {
156      /**
157       * The default set of elements that are removed if they have no attributes.
158       * Since {@code <img>} is in this set, by default, a policy will remove
159       * {@code <img src=javascript:alert(1337)>} because its URL is not allowed
160       * and it has no other attributes that would warrant it appearing in the
161       * output.
162       */
163      public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
164          = ImmutableSet.of("a", "font", "img", "input", "span");
165    
166      private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
167      private final Map<String, Map<String, AttributePolicy>> attrPolicies
168          = Maps.newLinkedHashMap();
169      private final Map<String, AttributePolicy> globalAttrPolicies
170          = Maps.newLinkedHashMap();
171      private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
172      private final Set<String> skipIfEmpty = Sets.newLinkedHashSet(
173          DEFAULT_SKIP_IF_EMPTY);
174      private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
175      private boolean requireRelNofollowOnLinks;
176    
177      /**
178       * Allows the named elements.
179       */
180      public HtmlPolicyBuilder allowElements(String... elementNames) {
181        return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
182      }
183    
184      /**
185       * Disallows the named elements.  Elements are disallowed by default, so
186       * there is no need to disallow elements, unless you are making an exception
187       * based on an earlier allow.
188       */
189      public HtmlPolicyBuilder disallowElements(String... elementNames) {
190        return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
191      }
192    
193      /**
194       * Allow the given elements with the given policy.
195       *
196       * @param policy May remove or add attributes, change the element name, or
197       *    deny the element.
198       */
199      public HtmlPolicyBuilder allowElements(
200          ElementPolicy policy, String... elementNames) {
201        invalidateCompiledState();
202        for (String elementName : elementNames) {
203          elementName = HtmlLexer.canonicalName(elementName);
204          ElementPolicy newPolicy = ElementPolicy.Util.join(
205              elPolicies.get(elementName), policy);
206          // Don't remove if newPolicy is the always reject policy since we want
207          // that to infect later allowElement calls for this particular element
208          // name.  rejects should have higher priority than allows.
209          elPolicies.put(elementName, newPolicy);
210          if (!textContainers.containsKey(elementName)
211              && TagBalancingHtmlStreamEventReceiver
212                  .allowsPlainTextualContent(elementName)) {
213            textContainers.put(elementName, true);
214          }
215        }
216        return this;
217      }
218    
219      /**
220       * A canned policy that allows a number of common formatting elements.
221       */
222      public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
223        return allowElements(
224            "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
225            "strike", "tt", "code", "big", "small", "br", "span");
226      }
227    
228      /**
229       * A canned policy that allows a number of common block elements.
230       */
231      public HtmlPolicyBuilder allowCommonBlockElements() {
232        return allowElements(
233            "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
234            "blockquote");
235      }
236    
237      /**
238       * Allows text content in the named elements.
239       * By default, text content is allowed in any
240       * {@link #allowElements allowed elements} that can contain character data per
241       * the HTML5 spec, but text content is not allowed by default in elements that
242       * contain content of other kinds (like JavaScript in {@code <script>}
243       * elements.
244       * <p>
245       * To write a policy that whitelists {@code <script>} or {@code <style>}
246       * elements, first {@code allowTextIn("script")}.
247       */
248      public HtmlPolicyBuilder allowTextIn(String... elementNames) {
249        invalidateCompiledState();
250        for (String elementName : elementNames) {
251          elementName = HtmlLexer.canonicalName(elementName);
252          textContainers.put(elementName, true);
253        }
254        return this;
255      }
256    
257      public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
258        invalidateCompiledState();
259        for (String elementName : elementNames) {
260          elementName = HtmlLexer.canonicalName(elementName);
261          textContainers.put(elementName, false);
262        }
263        return this;
264      }
265    
266      /**
267       * Assuming the given elements are allowed, allows them to appear without
268       * attributes.
269       *
270       * @see #DEFAULT_SKIP_IF_EMPTY
271       * @see #disallowWithoutAttributes
272       */
273      public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
274        invalidateCompiledState();
275        for (String elementName : elementNames) {
276          elementName = HtmlLexer.canonicalName(elementName);
277          skipIfEmpty.remove(elementName);
278        }
279        return this;
280      }
281    
282      /**
283       * Disallows the given elements from appearing without attributes.
284       *
285       * @see #DEFAULT_SKIP_IF_EMPTY
286       * @see #allowWithoutAttributes
287       */
288      public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
289        invalidateCompiledState();
290        for (String elementName : elementNames) {
291          elementName = HtmlLexer.canonicalName(elementName);
292          skipIfEmpty.add(elementName);
293        }
294        return this;
295      }
296    
297      /**
298       * Returns an object that lets you associate policies with the given
299       * attributes, and allow them globally or on specific elements.
300       */
301      public AttributeBuilder allowAttributes(String... attributeNames) {
302        ImmutableList.Builder<String> b = ImmutableList.builder();
303        for (String attributeName : attributeNames) {
304          b.add(HtmlLexer.canonicalName(attributeName));
305        }
306        return new AttributeBuilder(b.build());
307      }
308    
309      /**
310       * Reverse an earlier attribute {@link #allowAttributes allow}.
311       * <p>
312       * For this to have an effect you must call at least one of
313       * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
314       * <p>
315       * Attributes are disallowed by default, so there is no need to call this
316       * with a laundry list of attribute/element pairs.
317       */
318      public AttributeBuilder disallowAttributes(String... attributeNames) {
319        return this.allowAttributes(attributeNames)
320            .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
321      }
322    
323    
324      private HtmlPolicyBuilder allowAttributesGlobally(
325          AttributePolicy policy, List<String> attributeNames) {
326        invalidateCompiledState();
327        for (String attributeName : attributeNames) {
328          // We reinterpret the identity policy later via policy joining since its
329          // the default passed from the policy-less method, but we don't do
330          // anything here since we don't know until build() is called whether the
331          // policy author wants to allow certain URL protocols or wants to deal
332          // with styles.
333          AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
334          globalAttrPolicies.put(
335              attributeName, AttributePolicy.Util.join(oldPolicy, policy));
336        }
337        return this;
338      }
339    
340      private HtmlPolicyBuilder allowAttributesOnElements(
341          AttributePolicy policy, List<String> attributeNames,
342          List<String> elementNames) {
343        invalidateCompiledState();
344        for (String elementName : elementNames) {
345          Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
346          if (policies == null) {
347            policies = Maps.newLinkedHashMap();
348            attrPolicies.put(elementName, policies);
349          }
350          for (String attributeName : attributeNames) {
351            AttributePolicy oldPolicy = policies.get(attributeName);
352            policies.put(
353                attributeName,
354                AttributePolicy.Util.join(oldPolicy, policy));
355          }
356        }
357        return this;
358      }
359    
360      /**
361       * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a>
362       * to links.
363       */
364      public HtmlPolicyBuilder requireRelNofollowOnLinks() {
365        invalidateCompiledState();
366        this.requireRelNofollowOnLinks = true;
367        return this;
368      }
369    
370      /**
371       * Adds to the set of protocols that are allowed in URL attributes.
372       * For each URL attribute that is allowed, we further constrain it by
373       * only allowing the value through if it specifies no protocol, or if it
374       * specifies one in the allowedProtocols white-list.
375       * This is done regardless of whether any protocols have been allowed, so
376       * allowing the attribute "href" globally with the identity policy but
377       * not white-listing any protocols, effectively disallows the "href"
378       * attribute globally.
379       * <p>
380       * Do not allow any <code>*script</code> such as <code>javascript</code>
381       * protocols if you might use this policy with untrusted code.
382       */
383      public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
384        invalidateCompiledState();
385        // If there is at least one allowed protocol, then allow URLs and
386        // add a filter that checks href and src values.
387    
388        // Do not allow href and srcs through otherwise, and only allow on images
389        // and links.
390        for (String protocol : protocols) {
391          protocol = Strings.toLowerCase(protocol);
392          allowedProtocols.add(protocol);
393        }
394        return this;
395      }
396    
397      /**
398       * Reverses a decision made by {@link #allowUrlProtocols}.
399       */
400      public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
401        invalidateCompiledState();
402        for (String protocol : protocols) {
403          protocol = Strings.toLowerCase(protocol);
404          allowedProtocols.remove(protocol);
405        }
406        return this;
407      }
408    
409      /**
410       * A canned URL protocol policy that allows <code>http</code>,
411       * <code>https</code>, and <code>mailto</code>.
412       */
413      public HtmlPolicyBuilder allowStandardUrlProtocols() {
414        return allowUrlProtocols("http", "https", "mailto");
415      }
416    
417      /**
418       * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
419       * color, font-size, type-face, and other styling using the default schema;
420       * but which does not allow content to escape its clipping context.
421       */
422      public HtmlPolicyBuilder allowStyling() {
423        allowStyling(CssSchema.DEFAULT);
424        return this;
425      }
426    
427      /**
428       * Convert <code>style="&lt;CSS&gt;"</code> to sanitized CSS which allows
429       * color, font-size, type-face, and other styling using the given schema.
430       */
431      public HtmlPolicyBuilder allowStyling(CssSchema whitelist) {
432        invalidateCompiledState();
433        allowAttributesGlobally(
434            new StylingPolicy(whitelist), ImmutableList.of("style"));
435        return this;
436      }
437    
438      /**
439       * Names of attributes from HTML 4 whose values are URLs.
440       * Other attributes, e.g. <code>style</code> may contain URLs even though
441       * there values are not URLs.
442       */
443      private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of(
444          "action", "archive", "background", "cite", "classid", "codebase", "data",
445          "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
446          "profile", "src", "srcset", "usemap");
447    
448      /**
449       * Produces a policy based on the allow and disallow calls previously made.
450       *
451       * @param out receives calls to open only tags allowed by
452       *      previous calls to this object.
453       *      Typically a {@link HtmlStreamRenderer}.
454       */
455      public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
456        return toFactory().apply(out);
457      }
458    
459      /**
460       * Produces a policy based on the allow and disallow calls previously made.
461       *
462       * @param out receives calls to open only tags allowed by
463       *      previous calls to this object.
464       *      Typically a {@link HtmlStreamRenderer}.
465       * @param listener is notified of dropped tags and attributes so that
466       *      intrusion detection systems can be alerted to questionable HTML.
467       *      If {@code null} then no notifications are sent.
468       * @param context if {@code (listener != null)} then the context value passed
469       *      with alerts.  This can be used to let the listener know from which
470       *      connection or request the questionable HTML was received.
471       */
472      public <CTX> HtmlSanitizer.Policy build(
473          HtmlStreamEventReceiver out,
474          @Nullable HtmlChangeListener<? super CTX> listener,
475          @Nullable CTX context) {
476        return toFactory().apply(out, listener, context);
477      }
478    
479      /**
480       * Like {@link #build} but can be reused to create many different policies
481       * each backed by a different output channel.
482       */
483      public PolicyFactory toFactory() {
484        ImmutableSet.Builder<String> textContainers = ImmutableSet.builder();
485        for (Map.Entry<String, Boolean> textContainer
486             : this.textContainers.entrySet()) {
487          if (Boolean.TRUE.equals(textContainer.getValue())) {
488            textContainers.add(textContainer.getKey());
489          }
490        }
491        return new PolicyFactory(compilePolicies(), textContainers.build(),
492                                 ImmutableMap.copyOf(globalAttrPolicies));
493      }
494    
495      // Speed up subsequent builds by caching the compiled policies.
496      private transient ImmutableMap<String, ElementAndAttributePolicies>
497          compiledPolicies;
498    
499      /** Called by mutators to signal that any compiled policy is out-of-date. */
500      private void invalidateCompiledState() {
501        compiledPolicies = null;
502      }
503    
504      private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() {
505        if (compiledPolicies != null) { return compiledPolicies; }
506    
507        // Copy maps before normalizing in case builder is reused.
508        Map<String, ElementPolicy> elPolicies
509            = Maps.newLinkedHashMap(this.elPolicies);
510        Map<String, Map<String, AttributePolicy>> attrPolicies
511            = Maps.newLinkedHashMap(this.attrPolicies);
512        for (Map.Entry<String, Map<String, AttributePolicy>> e :
513             attrPolicies.entrySet()) {
514          e.setValue(Maps.newLinkedHashMap(e.getValue()));
515        }
516        Map<String, AttributePolicy> globalAttrPolicies
517            = Maps.newLinkedHashMap(this.globalAttrPolicies);
518        Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
519    
520        // Implement requireRelNofollowOnLinks
521        if (requireRelNofollowOnLinks) {
522          ElementPolicy linkPolicy = elPolicies.get("a");
523          if (linkPolicy == null) {
524            linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY;
525          }
526          elPolicies.put(
527              "a",
528              ElementPolicy.Util.join(
529                  linkPolicy,
530                  new ElementPolicy() {
531                    public String apply(String elementName, List<String> attrs) {
532                      for (int i = 0, n = attrs.size(); i < n; i += 2) {
533                        if ("href".equals(attrs.get(i))) {
534                          attrs.add("rel");
535                          attrs.add("nofollow");
536                          break;
537                        }
538                      }
539                      return elementName;
540                    }
541                  }));
542        }
543    
544        // Implement protocol policies.
545        // For each URL attribute that is allowed, we further constrain it by
546        // only allowing the value through if it specifies no protocol, or if it
547        // specifies one in the allowedProtocols white-list.
548        // This is done regardless of whether any protocols have been allowed, so
549        // allowing the attribute "href" globally with the identity policy but
550        // not white-listing any protocols, effectively disallows the "href"
551        // attribute globally.
552        {
553          AttributePolicy urlAttributePolicy;
554          if (allowedProtocols.size() == 3
555              && allowedProtocols.contains("mailto")
556              && allowedProtocols.contains("http")
557              && allowedProtocols.contains("https")) {
558            urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
559          } else {
560            urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
561                allowedProtocols);
562          }
563          Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES);
564          for (String urlAttributeName : URL_ATTRIBUTE_NAMES) {
565            if (globalAttrPolicies.containsKey(urlAttributeName)) {
566              toGuard.remove(urlAttributeName);
567              globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join(
568                  urlAttributePolicy, globalAttrPolicies.get(urlAttributeName)));
569            }
570          }
571          // Implement guards not implemented on global policies in the per-element
572          // policy maps.
573          for (Map.Entry<String, Map<String, AttributePolicy>> e
574               : attrPolicies.entrySet()) {
575            Map<String, AttributePolicy> policies = e.getValue();
576            for (String urlAttributeName : toGuard) {
577              if (policies.containsKey(urlAttributeName)) {
578                policies.put(urlAttributeName, AttributePolicy.Util.join(
579                    urlAttributePolicy, policies.get(urlAttributeName)));
580              }
581            }
582          }
583        }
584    
585        ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
586            = ImmutableMap.builder();
587        for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
588          String elementName = e.getKey();
589          ElementPolicy elPolicy = e.getValue();
590          if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
591            continue;
592          }
593    
594          Map<String, AttributePolicy> elAttrPolicies
595              = attrPolicies.get(elementName);
596          if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); }
597          ImmutableMap.Builder<String, AttributePolicy> attrs
598              = ImmutableMap.builder();
599          for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
600            String attributeName = ape.getKey();
601            // Handle below so we don't end up putting the same key into the map
602            // twice.  ImmutableMap.Builder hates that.
603            if (globalAttrPolicies.containsKey(attributeName)) { continue; }
604            AttributePolicy policy = ape.getValue();
605            if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
606              attrs.put(attributeName, policy);
607            }
608          }
609          for (Map.Entry<String, AttributePolicy> ape
610               : globalAttrPolicies.entrySet()) {
611            String attributeName = ape.getKey();
612            AttributePolicy policy = AttributePolicy.Util.join(
613                elAttrPolicies.get(attributeName), ape.getValue());
614            if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
615              attrs.put(attributeName, policy);
616            }
617          }
618    
619          policiesBuilder.put(
620              elementName,
621              new ElementAndAttributePolicies(
622                  elementName,
623                  elPolicy, attrs.build(), skipIfEmpty.contains(elementName)));
624        }
625        return compiledPolicies = policiesBuilder.build();
626      }
627    
628      /**
629       * Builds the relationship between attributes, the values that they may have,
630       * and the elements on which they may appear.
631       *
632       * @author Mike Samuel
633       */
634      public final class AttributeBuilder {
635        private final List<String> attributeNames;
636        private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
637    
638        AttributeBuilder(List<? extends String> attributeNames) {
639          this.attributeNames = ImmutableList.copyOf(attributeNames);
640        }
641    
642        /**
643         * Filters and/or transforms the attribute values
644         * allowed by later {@code allow*} calls.
645         * Multiple calls to {@code matching} are combined so that the policies
646         * receive the value in order, each seeing the value after any
647         * transformation by a previous policy.
648         */
649        public AttributeBuilder matching(AttributePolicy policy) {
650          this.policy = AttributePolicy.Util.join(this.policy, policy);
651          return this;
652        }
653    
654        /**
655         * Restrict the values allowed by later {@code allow*} calls to those
656         * matching the pattern.
657         * Multiple calls to {@code matching} are combined to restrict to the
658         * intersection of possible matched values.
659         */
660        public AttributeBuilder matching(final Pattern pattern) {
661          return matching(new AttributePolicy() {
662            public @Nullable String apply(
663                String elementName, String attributeName, String value) {
664              return pattern.matcher(value).matches() ? value : null;
665            }
666          });
667        }
668    
669        /**
670         * Restrict the values allowed by later {@code allow*} calls to those
671         * matching the given predicate.
672         * Multiple calls to {@code matching} are combined to restrict to the
673         * intersection of possible matched values.
674         */
675        public AttributeBuilder matching(
676            final Predicate<? super String> filter) {
677          return matching(new AttributePolicy() {
678            public @Nullable String apply(
679                String elementName, String attributeName, String value) {
680              return filter.apply(value) ? value : null;
681            }
682          });
683        }
684    
685        /**
686         * Restrict the values allowed by later {@code allow*} calls to those
687         * supplied.
688         * Multiple calls to {@code matching} are combined to restrict to the
689         * intersection of possible matched values.
690         */
691        public AttributeBuilder matching(
692            boolean ignoreCase, String... allowedValues) {
693          return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
694        }
695    
696        /**
697         * Restrict the values allowed by later {@code allow*} calls to those
698         * supplied.
699         * Multiple calls to {@code matching} are combined to restrict to the
700         * intersection of possible matched values.
701         */
702        public AttributeBuilder matching(
703            final boolean ignoreCase, Set<? extends String> allowedValues) {
704          final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
705          return matching(new AttributePolicy() {
706            public @Nullable String apply(
707                String elementName, String attributeName, String value) {
708              if (ignoreCase) { value = Strings.toLowerCase(value); }
709              return allowed.contains(value) ? value : null;
710            }
711          });
712        }
713    
714        /**
715         * Allows the given attributes on any elements but filters the
716         * attributes' values based on previous calls to {@code matching(...)}.
717         * Global attribute policies are applied after element specific policies.
718         * Be careful of using this with attributes like <code>type</code> which
719         * have different meanings on different attributes.
720         * Also be careful of allowing globally attributes like <code>href</code>
721         * which can have more far-reaching effects on tags like
722         * <code>&lt;base&gt;</code> and <code>&lt;link&gt;</code> than on
723         * <code>&lt;a&gt;</code> because in the former, they have an effect without
724         * user interaction and can change the behavior of the current page.
725         */
726        public HtmlPolicyBuilder globally() {
727          return HtmlPolicyBuilder.this.allowAttributesGlobally(
728              policy, attributeNames);
729        }
730    
731        /**
732         * Allows the named attributes on the given elements but filters the
733         * attributes' values based on previous calls to {@code matching(...)}.
734         */
735        public HtmlPolicyBuilder onElements(String... elementNames) {
736          ImmutableList.Builder<String> b = ImmutableList.builder();
737          for (String elementName : elementNames) {
738            b.add(HtmlLexer.canonicalName(elementName));
739          }
740          return HtmlPolicyBuilder.this.allowAttributesOnElements(
741              policy, attributeNames, b.build());
742        }
743      }
744    }