1<HTML>
2<BODY BGCOLOR="white">
3<PRE>
4<FONT color="green">001</FONT>    // Copyright (c) 2011, Mike Samuel<a name="line.1"></a>
5<FONT color="green">002</FONT>    // All rights reserved.<a name="line.2"></a>
6<FONT color="green">003</FONT>    //<a name="line.3"></a>
7<FONT color="green">004</FONT>    // Redistribution and use in source and binary forms, with or without<a name="line.4"></a>
8<FONT color="green">005</FONT>    // modification, are permitted provided that the following conditions<a name="line.5"></a>
9<FONT color="green">006</FONT>    // are met:<a name="line.6"></a>
10<FONT color="green">007</FONT>    //<a name="line.7"></a>
11<FONT color="green">008</FONT>    // Redistributions of source code must retain the above copyright<a name="line.8"></a>
12<FONT color="green">009</FONT>    // notice, this list of conditions and the following disclaimer.<a name="line.9"></a>
13<FONT color="green">010</FONT>    // Redistributions in binary form must reproduce the above copyright<a name="line.10"></a>
14<FONT color="green">011</FONT>    // notice, this list of conditions and the following disclaimer in the<a name="line.11"></a>
15<FONT color="green">012</FONT>    // documentation and/or other materials provided with the distribution.<a name="line.12"></a>
16<FONT color="green">013</FONT>    // Neither the name of the OWASP nor the names of its contributors may<a name="line.13"></a>
17<FONT color="green">014</FONT>    // be used to endorse or promote products derived from this software<a name="line.14"></a>
18<FONT color="green">015</FONT>    // without specific prior written permission.<a name="line.15"></a>
19<FONT color="green">016</FONT>    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS<a name="line.16"></a>
20<FONT color="green">017</FONT>    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT<a name="line.17"></a>
21<FONT color="green">018</FONT>    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS<a name="line.18"></a>
22<FONT color="green">019</FONT>    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE<a name="line.19"></a>
23<FONT color="green">020</FONT>    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,<a name="line.20"></a>
24<FONT color="green">021</FONT>    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,<a name="line.21"></a>
25<FONT color="green">022</FONT>    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;<a name="line.22"></a>
26<FONT color="green">023</FONT>    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER<a name="line.23"></a>
27<FONT color="green">024</FONT>    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<a name="line.24"></a>
28<FONT color="green">025</FONT>    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN<a name="line.25"></a>
29<FONT color="green">026</FONT>    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE<a name="line.26"></a>
30<FONT color="green">027</FONT>    // POSSIBILITY OF SUCH DAMAGE.<a name="line.27"></a>
31<FONT color="green">028</FONT>    <a name="line.28"></a>
32<FONT color="green">029</FONT>    package org.owasp.html;<a name="line.29"></a>
33<FONT color="green">030</FONT>    <a name="line.30"></a>
34<FONT color="green">031</FONT>    import javax.annotation.Nullable;<a name="line.31"></a>
35<FONT color="green">032</FONT>    <a name="line.32"></a>
36<FONT color="green">033</FONT>    import com.google.common.collect.ImmutableSet;<a name="line.33"></a>
37<FONT color="green">034</FONT>    <a name="line.34"></a>
38<FONT color="green">035</FONT>    /**<a name="line.35"></a>
39<FONT color="green">036</FONT>     * An attribute policy for attributes whose values are URLs that requires that<a name="line.36"></a>
40<FONT color="green">037</FONT>     * the value have no protocol or have an allowed protocol.<a name="line.37"></a>
41<FONT color="green">038</FONT>     *<a name="line.38"></a>
42<FONT color="green">039</FONT>     * &lt;p&gt;<a name="line.39"></a>
43<FONT color="green">040</FONT>     * URLs with protocols must match the protocol set passed to the constructor.<a name="line.40"></a>
44<FONT color="green">041</FONT>     * URLs without protocols but which specify an origin different from the<a name="line.41"></a>
45<FONT color="green">042</FONT>     * containing page (e.g. {@code //example.org}) are only allowed if the<a name="line.42"></a>
46<FONT color="green">043</FONT>     * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}<a name="line.43"></a>
47<FONT color="green">044</FONT>     * allows both {@code http} and {@code https} which are normally used to serve<a name="line.44"></a>
48<FONT color="green">045</FONT>     * HTML.<a name="line.45"></a>
49<FONT color="green">046</FONT>     * Same-origin URLs, URLs without any protocol or authority part are always<a name="line.46"></a>
50<FONT color="green">047</FONT>     * allowed.<a name="line.47"></a>
51<FONT color="green">048</FONT>     * &lt;/p&gt;<a name="line.48"></a>
52<FONT color="green">049</FONT>     *<a name="line.49"></a>
53<FONT color="green">050</FONT>     * &lt;p&gt;<a name="line.50"></a>
54<FONT color="green">051</FONT>     * This class assumes that URLs are either hierarchical, or are opaque, but<a name="line.51"></a>
55<FONT color="green">052</FONT>     * do not look like they contain an authority portion.<a name="line.52"></a>
56<FONT color="green">053</FONT>     * &lt;/p&gt;<a name="line.53"></a>
57<FONT color="green">054</FONT>     *<a name="line.54"></a>
58<FONT color="green">055</FONT>     * @author Mike Samuel &lt;mikesamuel@gmail.com&gt;<a name="line.55"></a>
59<FONT color="green">056</FONT>     */<a name="line.56"></a>
60<FONT color="green">057</FONT>    @TCB<a name="line.57"></a>
61<FONT color="green">058</FONT>    public class FilterUrlByProtocolAttributePolicy implements AttributePolicy {<a name="line.58"></a>
62<FONT color="green">059</FONT>      private final ImmutableSet&lt;String&gt; protocols;<a name="line.59"></a>
63<FONT color="green">060</FONT>    <a name="line.60"></a>
64<FONT color="green">061</FONT>      public FilterUrlByProtocolAttributePolicy(<a name="line.61"></a>
65<FONT color="green">062</FONT>          Iterable&lt;? extends String&gt; protocols) {<a name="line.62"></a>
66<FONT color="green">063</FONT>        this.protocols = ImmutableSet.copyOf(protocols);<a name="line.63"></a>
67<FONT color="green">064</FONT>      }<a name="line.64"></a>
68<FONT color="green">065</FONT>    <a name="line.65"></a>
69<FONT color="green">066</FONT>      public @Nullable String apply(<a name="line.66"></a>
70<FONT color="green">067</FONT>          String elementName, String attributeName, String s) {<a name="line.67"></a>
71<FONT color="green">068</FONT>        protocol_loop:<a name="line.68"></a>
72<FONT color="green">069</FONT>        for (int i = 0, n = s.length(); i &lt; n; ++i) {<a name="line.69"></a>
73<FONT color="green">070</FONT>          switch (s.charAt(i)) {<a name="line.70"></a>
74<FONT color="green">071</FONT>            case '/': case '#': case '?':  // No protocol.<a name="line.71"></a>
75<FONT color="green">072</FONT>              // Check for domain relative URLs like //www.evil.org/<a name="line.72"></a>
76<FONT color="green">073</FONT>              if (s.startsWith("//")<a name="line.73"></a>
77<FONT color="green">074</FONT>                  // or the protocols by which HTML is normally served are OK.<a name="line.74"></a>
78<FONT color="green">075</FONT>                  &amp;&amp; !allowProtocolRelativeUrls()) {<a name="line.75"></a>
79<FONT color="green">076</FONT>                return null;<a name="line.76"></a>
80<FONT color="green">077</FONT>              }<a name="line.77"></a>
81<FONT color="green">078</FONT>              break protocol_loop;<a name="line.78"></a>
82<FONT color="green">079</FONT>            case ':':<a name="line.79"></a>
83<FONT color="green">080</FONT>              String protocol = Strings.toLowerCase(s.substring(0, i));<a name="line.80"></a>
84<FONT color="green">081</FONT>              if (!protocols.contains(protocol)) { return null; }<a name="line.81"></a>
85<FONT color="green">082</FONT>              break protocol_loop;<a name="line.82"></a>
86<FONT color="green">083</FONT>          }<a name="line.83"></a>
87<FONT color="green">084</FONT>        }<a name="line.84"></a>
88<FONT color="green">085</FONT>        return normalizeUri(s);<a name="line.85"></a>
89<FONT color="green">086</FONT>      }<a name="line.86"></a>
90<FONT color="green">087</FONT>    <a name="line.87"></a>
91<FONT color="green">088</FONT>      protected boolean allowProtocolRelativeUrls() {<a name="line.88"></a>
92<FONT color="green">089</FONT>        return protocols.contains("http") &amp;&amp; protocols.contains("https");<a name="line.89"></a>
93<FONT color="green">090</FONT>      }<a name="line.90"></a>
94<FONT color="green">091</FONT>    <a name="line.91"></a>
95<FONT color="green">092</FONT>      /** Percent encodes anything that looks like a colon, or a parenthesis. */<a name="line.92"></a>
96<FONT color="green">093</FONT>      static String normalizeUri(String s) {<a name="line.93"></a>
97<FONT color="green">094</FONT>        int n = s.length();<a name="line.94"></a>
98<FONT color="green">095</FONT>        boolean colonsIrrelevant = false;<a name="line.95"></a>
99<FONT color="green">096</FONT>        for (int i = 0; i &lt; n; ++i) {<a name="line.96"></a>
100<FONT color="green">097</FONT>          char ch = s.charAt(i);<a name="line.97"></a>
101<FONT color="green">098</FONT>          switch (ch) {<a name="line.98"></a>
102<FONT color="green">099</FONT>            case '/': case '#': case '?': case ':':<a name="line.99"></a>
103<FONT color="green">100</FONT>              colonsIrrelevant = true;<a name="line.100"></a>
104<FONT color="green">101</FONT>              break;<a name="line.101"></a>
105<FONT color="green">102</FONT>            case '(': case ')': case '\uff1a':<a name="line.102"></a>
106<FONT color="green">103</FONT>              StringBuilder sb = new StringBuilder(n + 16);<a name="line.103"></a>
107<FONT color="green">104</FONT>              int pos = 0;<a name="line.104"></a>
108<FONT color="green">105</FONT>              for (; i &lt; n; ++i) {<a name="line.105"></a>
109<FONT color="green">106</FONT>                ch = s.charAt(i);<a name="line.106"></a>
110<FONT color="green">107</FONT>                switch (ch) {<a name="line.107"></a>
111<FONT color="green">108</FONT>                  case '(':<a name="line.108"></a>
112<FONT color="green">109</FONT>                    sb.append(s, pos, i).append("%28");<a name="line.109"></a>
113<FONT color="green">110</FONT>                    pos = i + 1;<a name="line.110"></a>
114<FONT color="green">111</FONT>                    break;<a name="line.111"></a>
115<FONT color="green">112</FONT>                  case ')':<a name="line.112"></a>
116<FONT color="green">113</FONT>                    sb.append(s, pos, i).append("%29");<a name="line.113"></a>
117<FONT color="green">114</FONT>                    pos = i + 1;<a name="line.114"></a>
118<FONT color="green">115</FONT>                    break;<a name="line.115"></a>
119<FONT color="green">116</FONT>                  default:<a name="line.116"></a>
120<FONT color="green">117</FONT>                    if (ch &gt; 0x100 &amp;&amp; !colonsIrrelevant) {<a name="line.117"></a>
121<FONT color="green">118</FONT>                      // Other colon like characters.<a name="line.118"></a>
122<FONT color="green">119</FONT>                      // TODO: do we need to encode non-colon characters if we're<a name="line.119"></a>
123<FONT color="green">120</FONT>                      // not dealing with URLs that haven't been copy/pasted into<a name="line.120"></a>
124<FONT color="green">121</FONT>                      // the URL bar?<a name="line.121"></a>
125<FONT color="green">122</FONT>                      // Is it safe to assume UTF-8 here?<a name="line.122"></a>
126<FONT color="green">123</FONT>                      switch (ch) {<a name="line.123"></a>
127<FONT color="green">124</FONT>                        case '\u0589':<a name="line.124"></a>
128<FONT color="green">125</FONT>                          sb.append(s, pos, i).append("%d6%89");<a name="line.125"></a>
129<FONT color="green">126</FONT>                          pos = i + 1;<a name="line.126"></a>
130<FONT color="green">127</FONT>                          break;<a name="line.127"></a>
131<FONT color="green">128</FONT>                        case '\u05c3':<a name="line.128"></a>
132<FONT color="green">129</FONT>                          sb.append(s, pos, i).append("%d7%83");<a name="line.129"></a>
133<FONT color="green">130</FONT>                          pos = i + 1;<a name="line.130"></a>
134<FONT color="green">131</FONT>                          break;<a name="line.131"></a>
135<FONT color="green">132</FONT>                        case '\u2236':<a name="line.132"></a>
136<FONT color="green">133</FONT>                          sb.append(s, pos, i).append("%e2%88%b6");<a name="line.133"></a>
137<FONT color="green">134</FONT>                          pos = i + 1;<a name="line.134"></a>
138<FONT color="green">135</FONT>                          break;<a name="line.135"></a>
139<FONT color="green">136</FONT>                        case '\uff1a':<a name="line.136"></a>
140<FONT color="green">137</FONT>                          sb.append(s, pos, i).append("%ef%bc%9a");<a name="line.137"></a>
141<FONT color="green">138</FONT>                          pos = i + 1;<a name="line.138"></a>
142<FONT color="green">139</FONT>                          break;<a name="line.139"></a>
143<FONT color="green">140</FONT>                      }<a name="line.140"></a>
144<FONT color="green">141</FONT>                    }<a name="line.141"></a>
145<FONT color="green">142</FONT>                    break;<a name="line.142"></a>
146<FONT color="green">143</FONT>                }<a name="line.143"></a>
147<FONT color="green">144</FONT>              }<a name="line.144"></a>
148<FONT color="green">145</FONT>              return sb.append(s, pos, n).toString();<a name="line.145"></a>
149<FONT color="green">146</FONT>          }<a name="line.146"></a>
150<FONT color="green">147</FONT>        }<a name="line.147"></a>
151<FONT color="green">148</FONT>        return s;<a name="line.148"></a>
152<FONT color="green">149</FONT>      }<a name="line.149"></a>
153<FONT color="green">150</FONT>    <a name="line.150"></a>
154<FONT color="green">151</FONT>      @Override<a name="line.151"></a>
155<FONT color="green">152</FONT>      public boolean equals(Object o) {<a name="line.152"></a>
156<FONT color="green">153</FONT>        return o != null &amp;&amp; this.getClass() == o.getClass()<a name="line.153"></a>
157<FONT color="green">154</FONT>            &amp;&amp; protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols);<a name="line.154"></a>
158<FONT color="green">155</FONT>      }<a name="line.155"></a>
159<FONT color="green">156</FONT>    <a name="line.156"></a>
160<FONT color="green">157</FONT>      @Override<a name="line.157"></a>
161<FONT color="green">158</FONT>      public int hashCode() {<a name="line.158"></a>
162<FONT color="green">159</FONT>        return protocols.hashCode();<a name="line.159"></a>
163<FONT color="green">160</FONT>      }<a name="line.160"></a>
164<FONT color="green">161</FONT>    <a name="line.161"></a>
165<FONT color="green">162</FONT>    }<a name="line.162"></a>
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226</PRE>
227</BODY>
228</HTML>
229