FilterUrlByProtocolAttributePolicy.java revision c517d7c6cadcd8643d565783464a2728be8c08d9
18403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Copyright (c) 2011, Mike Samuel
28403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// All rights reserved.
38403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
48403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistribution and use in source and binary forms, with or without
58403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// modification, are permitted provided that the following conditions
68403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// are met:
78403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
88403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions of source code must retain the above copyright
98403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer.
108403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions in binary form must reproduce the above copyright
118403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer in the
128403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// documentation and/or other materials provided with the distribution.
138403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Neither the name of the OWASP nor the names of its contributors may
148403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// be used to endorse or promote products derived from this software
158403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// without specific prior written permission.
168403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
178403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
188403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
198403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
208403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
218403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
228403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
238403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
248403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
258403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
268403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
278403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// POSSIBILITY OF SUCH DAMAGE.
288403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel
294e867904c8295537803c1c8a076e130df5674b58mikesamuelpackage org.owasp.html;
304e867904c8295537803c1c8a076e130df5674b58mikesamuel
314e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.Nullable;
324e867904c8295537803c1c8a076e130df5674b58mikesamuel
334e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.collect.ImmutableSet;
344e867904c8295537803c1c8a076e130df5674b58mikesamuel
354e867904c8295537803c1c8a076e130df5674b58mikesamuel/**
364e867904c8295537803c1c8a076e130df5674b58mikesamuel * An attribute policy for attributes whose values are URLs that requires that
374e867904c8295537803c1c8a076e130df5674b58mikesamuel * the value have no protocol or have an allowed protocol.
384e867904c8295537803c1c8a076e130df5674b58mikesamuel *
394e867904c8295537803c1c8a076e130df5674b58mikesamuel * <p>
404e867904c8295537803c1c8a076e130df5674b58mikesamuel * URLs with protocols must match the protocol set passed to the constructor.
414e867904c8295537803c1c8a076e130df5674b58mikesamuel * URLs without protocols but which specify an origin different from the
424e867904c8295537803c1c8a076e130df5674b58mikesamuel * containing page (e.g. {@code //example.org}) are only allowed if the
434e867904c8295537803c1c8a076e130df5674b58mikesamuel * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}
444e867904c8295537803c1c8a076e130df5674b58mikesamuel * allows both {@code http} and {@code https} which are normally used to serve
454e867904c8295537803c1c8a076e130df5674b58mikesamuel * HTML.
464e867904c8295537803c1c8a076e130df5674b58mikesamuel * Same-origin URLs, URLs without any protocol or authority part are always
474e867904c8295537803c1c8a076e130df5674b58mikesamuel * allowed.
484e867904c8295537803c1c8a076e130df5674b58mikesamuel * </p>
494e867904c8295537803c1c8a076e130df5674b58mikesamuel *
504e867904c8295537803c1c8a076e130df5674b58mikesamuel * <p>
514e867904c8295537803c1c8a076e130df5674b58mikesamuel * This class assumes that URLs are either hierarchical, or are opaque, but
524e867904c8295537803c1c8a076e130df5674b58mikesamuel * do not look like they contain an authority portion.
534e867904c8295537803c1c8a076e130df5674b58mikesamuel * </p>
544e867904c8295537803c1c8a076e130df5674b58mikesamuel *
556d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel * @author Mike Samuel <mikesamuel@gmail.com>
564e867904c8295537803c1c8a076e130df5674b58mikesamuel */
574e867904c8295537803c1c8a076e130df5674b58mikesamuel@TCB
584e867904c8295537803c1c8a076e130df5674b58mikesamuelpublic class FilterUrlByProtocolAttributePolicy implements AttributePolicy {
594e867904c8295537803c1c8a076e130df5674b58mikesamuel  private final ImmutableSet<String> protocols;
604e867904c8295537803c1c8a076e130df5674b58mikesamuel
614e867904c8295537803c1c8a076e130df5674b58mikesamuel  public FilterUrlByProtocolAttributePolicy(
624e867904c8295537803c1c8a076e130df5674b58mikesamuel      Iterable<? extends String> protocols) {
634e867904c8295537803c1c8a076e130df5674b58mikesamuel    this.protocols = ImmutableSet.copyOf(protocols);
644e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
654e867904c8295537803c1c8a076e130df5674b58mikesamuel
664e867904c8295537803c1c8a076e130df5674b58mikesamuel  public @Nullable String apply(
674e867904c8295537803c1c8a076e130df5674b58mikesamuel      String elementName, String attributeName, String s) {
684e867904c8295537803c1c8a076e130df5674b58mikesamuel    protocol_loop:
694e867904c8295537803c1c8a076e130df5674b58mikesamuel    for (int i = 0, n = s.length(); i < n; ++i) {
704e867904c8295537803c1c8a076e130df5674b58mikesamuel      switch (s.charAt(i)) {
714e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '/': case '#': case '?':  // No protocol.
724e867904c8295537803c1c8a076e130df5674b58mikesamuel          // Check for domain relative URLs like //www.evil.org/
734e867904c8295537803c1c8a076e130df5674b58mikesamuel          if (s.startsWith("//")
744e867904c8295537803c1c8a076e130df5674b58mikesamuel              // or the protocols by which HTML is normally served are OK.
754e867904c8295537803c1c8a076e130df5674b58mikesamuel              && !allowProtocolRelativeUrls()) {
764e867904c8295537803c1c8a076e130df5674b58mikesamuel            return null;
774e867904c8295537803c1c8a076e130df5674b58mikesamuel          }
784e867904c8295537803c1c8a076e130df5674b58mikesamuel          break protocol_loop;
794e867904c8295537803c1c8a076e130df5674b58mikesamuel        case ':':
80e7e78dd647a336268098d3438acc27ff4fcf0322mikesamuel          if (!protocols.contains(s.substring(0, i))) { return null; }
814e867904c8295537803c1c8a076e130df5674b58mikesamuel          break protocol_loop;
824e867904c8295537803c1c8a076e130df5674b58mikesamuel      }
834e867904c8295537803c1c8a076e130df5674b58mikesamuel    }
844e867904c8295537803c1c8a076e130df5674b58mikesamuel    return normalizeUri(s);
854e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
864e867904c8295537803c1c8a076e130df5674b58mikesamuel
874e867904c8295537803c1c8a076e130df5674b58mikesamuel  protected boolean allowProtocolRelativeUrls() {
884e867904c8295537803c1c8a076e130df5674b58mikesamuel    return protocols.contains("http") && protocols.contains("https");
894e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
904e867904c8295537803c1c8a076e130df5674b58mikesamuel
914e867904c8295537803c1c8a076e130df5674b58mikesamuel  /** Percent encodes anything that looks like a colon, or a parenthesis. */
924e867904c8295537803c1c8a076e130df5674b58mikesamuel  static String normalizeUri(String s) {
934e867904c8295537803c1c8a076e130df5674b58mikesamuel    int n = s.length();
944e867904c8295537803c1c8a076e130df5674b58mikesamuel    boolean colonsIrrelevant = false;
954e867904c8295537803c1c8a076e130df5674b58mikesamuel    for (int i = 0; i < n; ++i) {
964e867904c8295537803c1c8a076e130df5674b58mikesamuel      char ch = s.charAt(i);
974e867904c8295537803c1c8a076e130df5674b58mikesamuel      switch (ch) {
984e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '/': case '#': case '?': case ':':
994e867904c8295537803c1c8a076e130df5674b58mikesamuel          colonsIrrelevant = true;
1004e867904c8295537803c1c8a076e130df5674b58mikesamuel          break;
1014e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '(': case ')': case '\uff1a':
1024e867904c8295537803c1c8a076e130df5674b58mikesamuel          StringBuilder sb = new StringBuilder(n + 16);
1034e867904c8295537803c1c8a076e130df5674b58mikesamuel          int pos = 0;
1044e867904c8295537803c1c8a076e130df5674b58mikesamuel          for (; i < n; ++i) {
1054e867904c8295537803c1c8a076e130df5674b58mikesamuel            ch = s.charAt(i);
1064e867904c8295537803c1c8a076e130df5674b58mikesamuel            switch (ch) {
1074e867904c8295537803c1c8a076e130df5674b58mikesamuel              case '(':
1084e867904c8295537803c1c8a076e130df5674b58mikesamuel                sb.append(s, pos, i).append("%28");
1094e867904c8295537803c1c8a076e130df5674b58mikesamuel                pos = i + 1;
1104e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
1114e867904c8295537803c1c8a076e130df5674b58mikesamuel              case ')':
1124e867904c8295537803c1c8a076e130df5674b58mikesamuel                sb.append(s, pos, i).append("%29");
1134e867904c8295537803c1c8a076e130df5674b58mikesamuel                pos = i + 1;
1144e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
115c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel              default:
116c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                if (ch > 0x100 && !colonsIrrelevant) {
117c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  // Other colon like characters.
1184e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // TODO: do we need to encode non-colon characters if we're
1194e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // not dealing with URLs that haven't been copy/pasted into
1204e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // the URL bar?
1214e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // Is it safe to assume UTF-8 here?
122c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  switch (ch) {
123c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u0589':
124c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%d6%89");
125c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
126c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
127c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u05c3':
128c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%d7%83");
129c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
130c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
131c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u2236':
132c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%e2%88%b6");
133c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
134c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
135c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\uff1a':
136c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%ef%bc%9a");
137c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
138c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
139c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  }
1404e867904c8295537803c1c8a076e130df5674b58mikesamuel                }
1414e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
1424e867904c8295537803c1c8a076e130df5674b58mikesamuel            }
1434e867904c8295537803c1c8a076e130df5674b58mikesamuel          }
1444e867904c8295537803c1c8a076e130df5674b58mikesamuel          return sb.append(s, pos, n).toString();
1454e867904c8295537803c1c8a076e130df5674b58mikesamuel      }
1464e867904c8295537803c1c8a076e130df5674b58mikesamuel    }
1474e867904c8295537803c1c8a076e130df5674b58mikesamuel    return s;
1484e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
1494e867904c8295537803c1c8a076e130df5674b58mikesamuel
1504e867904c8295537803c1c8a076e130df5674b58mikesamuel}