16f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Copyright (c) 2011, Mike Samuel 26f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// All rights reserved. 36f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// 46f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistribution and use in source and binary forms, with or without 56f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// modification, are permitted provided that the following conditions 66f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// are met: 76f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// 86f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistributions of source code must retain the above copyright 96f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// notice, this list of conditions and the following disclaimer. 106f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Redistributions in binary form must reproduce the above copyright 116f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// notice, this list of conditions and the following disclaimer in the 126f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// documentation and/or other materials provided with the distribution. 136f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// Neither the name of the OWASP nor the names of its contributors may 146f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// be used to endorse or promote products derived from this software 156f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// without specific prior written permission. 166f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 176f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 186f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 196f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 206f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 216f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 226f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 236f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 246f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 256f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 266f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 276f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel// POSSIBILITY OF SUCH DAMAGE. 286f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 296f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelpackage org.owasp.html; 306f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 316f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.LinkedHashSet; 326f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.List; 336f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport java.util.Set; 346f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 356f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuelimport javax.annotation.Nullable; 366f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 376f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel/** 386f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * Sits between the HTML parser, and then policy, and the renderer so that it 396f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * can report dropped elements and attributes to an {@link HtmlChangeListener}. 406f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * 416f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * <pre> 426f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * HtmlChangeReporter<T> hcr = new HtmlChangeReporter<T>( 436f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * renderer, htmlChangeListener, context); 446f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer())); 4568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy()); 466f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * </pre> 476f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * 486f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * The renderer receives events from the policy unchanged, but the reporter 496f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * notices differences between the events from the lexer and those from the 506f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * policy. 516f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * 526f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * @param <T> The type of context value passed to the 536f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel */ 5468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuelpublic final class HtmlChangeReporter<T> { 556f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel private final OutputChannel output; 5668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel private final InputChannel<T> input; 576f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 586f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public HtmlChangeReporter( 596f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel HtmlStreamEventReceiver renderer, 606f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel HtmlChangeListener<? super T> listener, @Nullable T context) { 616f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel this.output = new OutputChannel(renderer); 6268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel this.input = new InputChannel<T>(output, listener, context); 636f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 646f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 656f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel /** 666f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * Associates an input channel. {@code this} receives events and forwards 676f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel * them to input. 686f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel */ 696f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void setPolicy(HtmlSanitizer.Policy policy) { 7068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel this.input.policy = policy; 716f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 726f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 736f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public HtmlStreamEventReceiver getWrappedRenderer() { return output; } 746f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 7568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public HtmlSanitizer.Policy getWrappedPolicy() { return input; } 766f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 7768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel private static final class InputChannel<T> implements HtmlSanitizer.Policy { 7868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel HtmlStreamEventReceiver policy; 7968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel final OutputChannel output; 8068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel final T context; 8168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel final HtmlChangeListener<? super T> listener; 8268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel 8368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel InputChannel( 8468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel OutputChannel output, HtmlChangeListener<? super T> listener, 8568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel @Nullable T context) { 8668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel this.output = output; 8768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel this.context = context; 8868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel this.listener = listener; 8968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel } 9068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel 9168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public void openDocument() { 9268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel policy.openDocument(); 9368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel } 946f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 9568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public void closeDocument() { 9668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel policy.closeDocument(); 976f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 9868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel 9968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public void openTag(String elementName, List<String> attrs) { 10068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel output.expectedElementName = elementName; 1016f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel output.expectedAttrNames.clear(); 10268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel for (int i = 0, n = attrs.size(); i < n; i += 2) { 10368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel output.expectedAttrNames.add(attrs.get(i)); 1046f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 10568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel policy.openTag(elementName, attrs); 10668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel { 10768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel // Gather the notification details to avoid any problems with the 10868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel // listener re-entering the stream event receiver. This shouldn't 10968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel // occur, but if it does it will be a source of subtle confusing bugs. 11068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel String discardedElementName = output.expectedElementName; 11168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel output.expectedElementName = null; 11268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel int nExpected = output.expectedAttrNames.size(); 11368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel String[] discardedAttrNames = 11468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel nExpected != 0 && discardedElementName == null 11568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel ? output.expectedAttrNames.toArray(new String[nExpected]) 11668c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel : ZERO_STRINGS; 11768c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel output.expectedAttrNames.clear(); 11868c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel // Dispatch notifications to the listener. 11968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel if (discardedElementName != null) { 12068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel listener.discardedTag(context, discardedElementName); 12168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel } 12268c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel if (discardedAttrNames.length != 0) { 12368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel listener.discardedAttributes( 12468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel context, elementName, discardedAttrNames); 1256f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1266f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1276f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1286f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 12968c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public void closeTag(String elementName) { 13068c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel policy.closeTag(elementName); 13168c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel } 1326f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 13368c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel public void text(String textChunk) { 13468c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel policy.text(textChunk); 13568c898cc07aad9e4c616522afdd13a0cc4534117mikesamuel } 1366f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1376f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1386f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel private static final class OutputChannel implements HtmlStreamEventReceiver { 1396f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel private final HtmlStreamEventReceiver renderer; 1406f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel String expectedElementName; 1416f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel Set<String> expectedAttrNames = new LinkedHashSet<String>(); 1426f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1436f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel OutputChannel(HtmlStreamEventReceiver renderer) { 1446f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel this.renderer = renderer; 1456f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1466f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1476f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void openDocument() { 1486f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel renderer.openDocument(); 1496f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1506f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1516f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void closeDocument() { 1526f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel renderer.closeDocument(); 1536f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1546f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1556f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void openTag(String elementName, List<String> attrs) { 1566f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel if (elementName.equals(expectedElementName)) { 1576f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel expectedElementName = null; 1586f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1596f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel for (int i = 0, n = attrs.size(); i < n; i += 2) { 1606f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel expectedAttrNames.remove(attrs.get(i)); 1616f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1626f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel renderer.openTag(elementName, attrs); 1636f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1646f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1656f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void closeTag(String elementName) { 1666f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel renderer.closeTag(elementName); 1676f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1686f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1696f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel public void text(String text) { 1706f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel renderer.text(text); 1716f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1726f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel } 1736f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel 1746f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel private static final String[] ZERO_STRINGS = new String[0]; 1756f2fc048ffc4ada68fabb389eb3f409229625b90mikesamuel} 176