1// Copyright (c) 2011, Mike Samuel 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions 6// are met: 7// 8// Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// Redistributions in binary form must reproduce the above copyright 11// notice, this list of conditions and the following disclaimer in the 12// documentation and/or other materials provided with the distribution. 13// Neither the name of the OWASP nor the names of its contributors may 14// be used to endorse or promote products derived from this software 15// without specific prior written permission. 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27// POSSIBILITY OF SUCH DAMAGE. 28 29package org.owasp.html; 30 31import java.util.LinkedHashSet; 32import java.util.List; 33import java.util.Set; 34 35import javax.annotation.Nullable; 36 37/** 38 * Sits between the HTML parser, and then policy, and the renderer so that it 39 * can report dropped elements and attributes to an {@link HtmlChangeListener}. 40 * 41 * <pre> 42 * HtmlChangeReporter<T> hcr = new HtmlChangeReporter<T>( 43 * renderer, htmlChangeListener, context); 44 * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer())); 45 * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy()); 46 * </pre> 47 * 48 * The renderer receives events from the policy unchanged, but the reporter 49 * notices differences between the events from the lexer and those from the 50 * policy. 51 * 52 * @param <T> The type of context value passed to the 53 */ 54public final class HtmlChangeReporter<T> { 55 private final OutputChannel output; 56 private final InputChannel<T> input; 57 58 public HtmlChangeReporter( 59 HtmlStreamEventReceiver renderer, 60 HtmlChangeListener<? super T> listener, @Nullable T context) { 61 this.output = new OutputChannel(renderer); 62 this.input = new InputChannel<T>(output, listener, context); 63 } 64 65 /** 66 * Associates an input channel. {@code this} receives events and forwards 67 * them to input. 68 */ 69 public void setPolicy(HtmlSanitizer.Policy policy) { 70 this.input.policy = policy; 71 } 72 73 public HtmlStreamEventReceiver getWrappedRenderer() { return output; } 74 75 public HtmlSanitizer.Policy getWrappedPolicy() { return input; } 76 77 private static final class InputChannel<T> implements HtmlSanitizer.Policy { 78 HtmlStreamEventReceiver policy; 79 final OutputChannel output; 80 final T context; 81 final HtmlChangeListener<? super T> listener; 82 83 InputChannel( 84 OutputChannel output, HtmlChangeListener<? super T> listener, 85 @Nullable T context) { 86 this.output = output; 87 this.context = context; 88 this.listener = listener; 89 } 90 91 public void openDocument() { 92 policy.openDocument(); 93 } 94 95 public void closeDocument() { 96 policy.closeDocument(); 97 } 98 99 public void openTag(String elementName, List<String> attrs) { 100 output.expectedElementName = elementName; 101 output.expectedAttrNames.clear(); 102 for (int i = 0, n = attrs.size(); i < n; i += 2) { 103 output.expectedAttrNames.add(attrs.get(i)); 104 } 105 policy.openTag(elementName, attrs); 106 { 107 // Gather the notification details to avoid any problems with the 108 // listener re-entering the stream event receiver. This shouldn't 109 // occur, but if it does it will be a source of subtle confusing bugs. 110 String discardedElementName = output.expectedElementName; 111 output.expectedElementName = null; 112 int nExpected = output.expectedAttrNames.size(); 113 String[] discardedAttrNames = 114 nExpected != 0 && discardedElementName == null 115 ? output.expectedAttrNames.toArray(new String[nExpected]) 116 : ZERO_STRINGS; 117 output.expectedAttrNames.clear(); 118 // Dispatch notifications to the listener. 119 if (discardedElementName != null) { 120 listener.discardedTag(context, discardedElementName); 121 } 122 if (discardedAttrNames.length != 0) { 123 listener.discardedAttributes( 124 context, elementName, discardedAttrNames); 125 } 126 } 127 } 128 129 public void closeTag(String elementName) { 130 policy.closeTag(elementName); 131 } 132 133 public void text(String textChunk) { 134 policy.text(textChunk); 135 } 136 } 137 138 private static final class OutputChannel implements HtmlStreamEventReceiver { 139 private final HtmlStreamEventReceiver renderer; 140 String expectedElementName; 141 Set<String> expectedAttrNames = new LinkedHashSet<String>(); 142 143 OutputChannel(HtmlStreamEventReceiver renderer) { 144 this.renderer = renderer; 145 } 146 147 public void openDocument() { 148 renderer.openDocument(); 149 } 150 151 public void closeDocument() { 152 renderer.closeDocument(); 153 } 154 155 public void openTag(String elementName, List<String> attrs) { 156 if (elementName.equals(expectedElementName)) { 157 expectedElementName = null; 158 } 159 for (int i = 0, n = attrs.size(); i < n; i += 2) { 160 expectedAttrNames.remove(attrs.get(i)); 161 } 162 renderer.openTag(elementName, attrs); 163 } 164 165 public void closeTag(String elementName) { 166 renderer.closeTag(elementName); 167 } 168 169 public void text(String text) { 170 renderer.text(text); 171 } 172 } 173 174 private static final String[] ZERO_STRINGS = new String[0]; 175} 176