16f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Copyright (c) 2011, Mike Samuel
26f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// All rights reserved.
36f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel//
46f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistribution and use in source and binary forms, with or without
56f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// modification, are permitted provided that the following conditions
66f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// are met:
76f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel//
86f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistributions of source code must retain the above copyright
96f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// notice, this list of conditions and the following disclaimer.
106f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistributions in binary form must reproduce the above copyright
116f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// notice, this list of conditions and the following disclaimer in the
126f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// documentation and/or other materials provided with the distribution.
136f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Neither the name of the OWASP nor the names of its contributors may
146f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// be used to endorse or promote products derived from this software
156f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// without specific prior written permission.
166f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
176f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
186f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
196f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
206f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
216f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
226f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
236f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
246f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
256f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
266f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
276f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// POSSIBILITY OF SUCH DAMAGE.
286f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
296f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelpackage org.owasp.html;
306f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
316f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.LinkedHashSet;
326f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.List;
336f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.Set;
346f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
356f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport javax.annotation.Nullable;
366f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
376f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel/**
386f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * Sits between the HTML parser, and then policy, and the renderer so that it
396f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * can report dropped elements and attributes to an {@link HtmlChangeListener}.
406f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel *
416f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * <pre>
426f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * HtmlChangeReporter&lt;T&gt; hcr = new HtmlChangeReporter&lt;T&gt;(
436f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel *   renderer, htmlChangeListener, context);
446f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer()));
4568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy());
466f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * </pre>
476f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel *
486f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * The renderer receives events from the policy unchanged, but the reporter
496f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * notices differences between the events from the lexer and those from the
506f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * policy.
516f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel *
526f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * @param <T> The type of context value passed to the
536f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel */
5468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuelpublic final class HtmlChangeReporter<T> {
556f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  private final OutputChannel output;
5668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel  private final InputChannel<T> input;
576f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
586f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  public HtmlChangeReporter(
596f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      HtmlStreamEventReceiver renderer,
606f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      HtmlChangeListener<? super T> listener, @Nullable T context) {
616f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    this.output = new OutputChannel(renderer);
6268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    this.input = new InputChannel<T>(output, listener, context);
636f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  }
646f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
656f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  /**
666f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel   * Associates an input channel.  {@code this} receives events and forwards
676f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel   * them to input.
686f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel   */
696f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  public void setPolicy(HtmlSanitizer.Policy policy) {
7068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    this.input.policy = policy;
716f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  }
726f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
736f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  public HtmlStreamEventReceiver getWrappedRenderer() { return output; }
746f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
7568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel  public HtmlSanitizer.Policy getWrappedPolicy() { return input; }
766f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
7768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel  private static final class InputChannel<T> implements HtmlSanitizer.Policy {
7868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    HtmlStreamEventReceiver policy;
7968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    final OutputChannel output;
8068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    final T context;
8168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    final HtmlChangeListener<? super T> listener;
8268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel
8368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    InputChannel(
8468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        OutputChannel output, HtmlChangeListener<? super T> listener,
8568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        @Nullable T context) {
8668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      this.output = output;
8768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      this.context = context;
8868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      this.listener = listener;
8968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    }
9068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel
9168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    public void openDocument() {
9268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      policy.openDocument();
9368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    }
946f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
9568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    public void closeDocument() {
9668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      policy.closeDocument();
976f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
9868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel
9968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    public void openTag(String elementName, List<String> attrs) {
10068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      output.expectedElementName = elementName;
1016f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      output.expectedAttrNames.clear();
10268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      for (int i = 0, n = attrs.size(); i < n; i += 2) {
10368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        output.expectedAttrNames.add(attrs.get(i));
1046f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      }
10568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      policy.openTag(elementName, attrs);
10668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      {
10768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        // Gather the notification details to avoid any problems with the
10868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        // listener re-entering the stream event receiver.  This shouldn't
10968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        // occur, but if it does it will be a source of subtle confusing bugs.
11068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        String discardedElementName = output.expectedElementName;
11168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        output.expectedElementName = null;
11268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        int nExpected = output.expectedAttrNames.size();
11368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        String[] discardedAttrNames =
11468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel            nExpected != 0 && discardedElementName == null
11568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel            ? output.expectedAttrNames.toArray(new String[nExpected])
11668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel            : ZERO_STRINGS;
11768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        output.expectedAttrNames.clear();
11868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        // Dispatch notifications to the listener.
11968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        if (discardedElementName != null) {
12068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel          listener.discardedTag(context, discardedElementName);
12168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        }
12268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel        if (discardedAttrNames.length != 0) {
12368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel          listener.discardedAttributes(
12468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel              context, elementName, discardedAttrNames);
1256f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel        }
1266f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      }
1276f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1286f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
12968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    public void closeTag(String elementName) {
13068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      policy.closeTag(elementName);
13168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    }
1326f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
13368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    public void text(String textChunk) {
13468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel      policy.text(textChunk);
13568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel    }
1366f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  }
1376f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1386f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  private static final class OutputChannel implements HtmlStreamEventReceiver {
1396f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    private final HtmlStreamEventReceiver renderer;
1406f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    String expectedElementName;
1416f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    Set<String> expectedAttrNames = new LinkedHashSet<String>();
1426f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1436f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    OutputChannel(HtmlStreamEventReceiver renderer) {
1446f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      this.renderer = renderer;
1456f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1466f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1476f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    public void openDocument() {
1486f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      renderer.openDocument();
1496f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1506f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1516f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    public void closeDocument() {
1526f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      renderer.closeDocument();
1536f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1546f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1556f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    public void openTag(String elementName, List<String> attrs) {
1566f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      if (elementName.equals(expectedElementName)) {
1576f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel        expectedElementName = null;
1586f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      }
1596f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      for (int i = 0, n = attrs.size(); i < n; i += 2) {
1606f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel        expectedAttrNames.remove(attrs.get(i));
1616f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      }
1626f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      renderer.openTag(elementName, attrs);
1636f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1646f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1656f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    public void closeTag(String elementName) {
1666f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      renderer.closeTag(elementName);
1676f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1686f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1696f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    public void text(String text) {
1706f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel      renderer.text(text);
1716f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel    }
1726f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  }
1736f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel
1746f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel  private static final String[] ZERO_STRINGS = new String[0];
1756f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel}
176