001 // Copyright (c) 2011, Mike Samuel 002 // All rights reserved. 003 // 004 // Redistribution and use in source and binary forms, with or without 005 // modification, are permitted provided that the following conditions 006 // are met: 007 // 008 // Redistributions of source code must retain the above copyright 009 // notice, this list of conditions and the following disclaimer. 010 // Redistributions in binary form must reproduce the above copyright 011 // notice, this list of conditions and the following disclaimer in the 012 // documentation and/or other materials provided with the distribution. 013 // Neither the name of the OWASP nor the names of its contributors may 014 // be used to endorse or promote products derived from this software 015 // without specific prior written permission. 016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 // POSSIBILITY OF SUCH DAMAGE. 028 029 package org.owasp.html; 030 031 import javax.annotation.Nullable; 032 033 import com.google.common.collect.ImmutableSet; 034 035 /** 036 * An attribute policy for attributes whose values are URLs that requires that 037 * the value have no protocol or have an allowed protocol. 038 * 039 * <p> 040 * URLs with protocols must match the protocol set passed to the constructor. 041 * URLs without protocols but which specify an origin different from the 042 * containing page (e.g. {@code //example.org}) are only allowed if the 043 * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy} 044 * allows both {@code http} and {@code https} which are normally used to serve 045 * HTML. 046 * Same-origin URLs, URLs without any protocol or authority part are always 047 * allowed. 048 * </p> 049 * 050 * <p> 051 * This class assumes that URLs are either hierarchical, or are opaque, but 052 * do not look like they contain an authority portion. 053 * </p> 054 * 055 * @author Mike Samuel <mikesamuel@gmail.com> 056 */ 057 @TCB 058 public class FilterUrlByProtocolAttributePolicy implements AttributePolicy { 059 private final ImmutableSet<String> protocols; 060 061 public FilterUrlByProtocolAttributePolicy( 062 Iterable<? extends String> protocols) { 063 this.protocols = ImmutableSet.copyOf(protocols); 064 } 065 066 public @Nullable String apply( 067 String elementName, String attributeName, String s) { 068 protocol_loop: 069 for (int i = 0, n = s.length(); i < n; ++i) { 070 switch (s.charAt(i)) { 071 case '/': case '#': case '?': // No protocol. 072 // Check for domain relative URLs like //www.evil.org/ 073 if (s.startsWith("//") 074 // or the protocols by which HTML is normally served are OK. 075 && !allowProtocolRelativeUrls()) { 076 return null; 077 } 078 break protocol_loop; 079 case ':': 080 String protocol = Strings.toLowerCase(s.substring(0, i)); 081 if (!protocols.contains(protocol)) { return null; } 082 break protocol_loop; 083 } 084 } 085 return normalizeUri(s); 086 } 087 088 protected boolean allowProtocolRelativeUrls() { 089 return protocols.contains("http") && protocols.contains("https"); 090 } 091 092 /** Percent encodes anything that looks like a colon, or a parenthesis. */ 093 static String normalizeUri(String s) { 094 int n = s.length(); 095 boolean colonsIrrelevant = false; 096 for (int i = 0; i < n; ++i) { 097 char ch = s.charAt(i); 098 switch (ch) { 099 case '/': case '#': case '?': case ':': 100 colonsIrrelevant = true; 101 break; 102 case '(': case ')': case '\uff1a': 103 StringBuilder sb = new StringBuilder(n + 16); 104 int pos = 0; 105 for (; i < n; ++i) { 106 ch = s.charAt(i); 107 switch (ch) { 108 case '(': 109 sb.append(s, pos, i).append("%28"); 110 pos = i + 1; 111 break; 112 case ')': 113 sb.append(s, pos, i).append("%29"); 114 pos = i + 1; 115 break; 116 default: 117 if (ch > 0x100 && !colonsIrrelevant) { 118 // Other colon like characters. 119 // TODO: do we need to encode non-colon characters if we're 120 // not dealing with URLs that haven't been copy/pasted into 121 // the URL bar? 122 // Is it safe to assume UTF-8 here? 123 switch (ch) { 124 case '\u0589': 125 sb.append(s, pos, i).append("%d6%89"); 126 pos = i + 1; 127 break; 128 case '\u05c3': 129 sb.append(s, pos, i).append("%d7%83"); 130 pos = i + 1; 131 break; 132 case '\u2236': 133 sb.append(s, pos, i).append("%e2%88%b6"); 134 pos = i + 1; 135 break; 136 case '\uff1a': 137 sb.append(s, pos, i).append("%ef%bc%9a"); 138 pos = i + 1; 139 break; 140 } 141 } 142 break; 143 } 144 } 145 return sb.append(s, pos, n).toString(); 146 } 147 } 148 return s; 149 } 150 151 @Override 152 public boolean equals(Object o) { 153 return o != null && this.getClass() == o.getClass() 154 && protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols); 155 } 156 157 @Override 158 public int hashCode() { 159 return protocols.hashCode(); 160 } 161 162 }