18403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Copyright (c) 2011, Mike Samuel
28403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// All rights reserved.
38403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
48403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistribution and use in source and binary forms, with or without
58403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// modification, are permitted provided that the following conditions
68403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// are met:
78403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel//
88403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions of source code must retain the above copyright
98403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer.
108403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Redistributions in binary form must reproduce the above copyright
118403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// notice, this list of conditions and the following disclaimer in the
128403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// documentation and/or other materials provided with the distribution.
138403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// Neither the name of the OWASP nor the names of its contributors may
148403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// be used to endorse or promote products derived from this software
158403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// without specific prior written permission.
168403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
178403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
188403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
198403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
208403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
218403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
228403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
238403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
248403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
258403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
268403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
278403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel// POSSIBILITY OF SUCH DAMAGE.
288403881c365ab36b721ccc4500af1b3a5bd25870mikesamuel
294e867904c8295537803c1c8a076e130df5674b58mikesamuelpackage org.owasp.html;
304e867904c8295537803c1c8a076e130df5674b58mikesamuel
314e867904c8295537803c1c8a076e130df5674b58mikesamuelimport javax.annotation.Nullable;
324e867904c8295537803c1c8a076e130df5674b58mikesamuel
334e867904c8295537803c1c8a076e130df5674b58mikesamuelimport com.google.common.collect.ImmutableSet;
344e867904c8295537803c1c8a076e130df5674b58mikesamuel
354e867904c8295537803c1c8a076e130df5674b58mikesamuel/**
364e867904c8295537803c1c8a076e130df5674b58mikesamuel * An attribute policy for attributes whose values are URLs that requires that
374e867904c8295537803c1c8a076e130df5674b58mikesamuel * the value have no protocol or have an allowed protocol.
384e867904c8295537803c1c8a076e130df5674b58mikesamuel *
394e867904c8295537803c1c8a076e130df5674b58mikesamuel * <p>
404e867904c8295537803c1c8a076e130df5674b58mikesamuel * URLs with protocols must match the protocol set passed to the constructor.
414e867904c8295537803c1c8a076e130df5674b58mikesamuel * URLs without protocols but which specify an origin different from the
424e867904c8295537803c1c8a076e130df5674b58mikesamuel * containing page (e.g. {@code //example.org}) are only allowed if the
434e867904c8295537803c1c8a076e130df5674b58mikesamuel * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}
444e867904c8295537803c1c8a076e130df5674b58mikesamuel * allows both {@code http} and {@code https} which are normally used to serve
454e867904c8295537803c1c8a076e130df5674b58mikesamuel * HTML.
464e867904c8295537803c1c8a076e130df5674b58mikesamuel * Same-origin URLs, URLs without any protocol or authority part are always
474e867904c8295537803c1c8a076e130df5674b58mikesamuel * allowed.
484e867904c8295537803c1c8a076e130df5674b58mikesamuel * </p>
494e867904c8295537803c1c8a076e130df5674b58mikesamuel *
504e867904c8295537803c1c8a076e130df5674b58mikesamuel * <p>
514e867904c8295537803c1c8a076e130df5674b58mikesamuel * This class assumes that URLs are either hierarchical, or are opaque, but
524e867904c8295537803c1c8a076e130df5674b58mikesamuel * do not look like they contain an authority portion.
534e867904c8295537803c1c8a076e130df5674b58mikesamuel * </p>
544e867904c8295537803c1c8a076e130df5674b58mikesamuel *
556d8c2e9241d042a3e0bff40dac4c388966ad060cmikesamuel * @author Mike Samuel <mikesamuel@gmail.com>
564e867904c8295537803c1c8a076e130df5674b58mikesamuel */
574e867904c8295537803c1c8a076e130df5674b58mikesamuel@TCB
584e867904c8295537803c1c8a076e130df5674b58mikesamuelpublic class FilterUrlByProtocolAttributePolicy implements AttributePolicy {
594e867904c8295537803c1c8a076e130df5674b58mikesamuel  private final ImmutableSet<String> protocols;
604e867904c8295537803c1c8a076e130df5674b58mikesamuel
614e867904c8295537803c1c8a076e130df5674b58mikesamuel  public FilterUrlByProtocolAttributePolicy(
624e867904c8295537803c1c8a076e130df5674b58mikesamuel      Iterable<? extends String> protocols) {
634e867904c8295537803c1c8a076e130df5674b58mikesamuel    this.protocols = ImmutableSet.copyOf(protocols);
644e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
654e867904c8295537803c1c8a076e130df5674b58mikesamuel
664e867904c8295537803c1c8a076e130df5674b58mikesamuel  public @Nullable String apply(
674e867904c8295537803c1c8a076e130df5674b58mikesamuel      String elementName, String attributeName, String s) {
684e867904c8295537803c1c8a076e130df5674b58mikesamuel    protocol_loop:
694e867904c8295537803c1c8a076e130df5674b58mikesamuel    for (int i = 0, n = s.length(); i < n; ++i) {
704e867904c8295537803c1c8a076e130df5674b58mikesamuel      switch (s.charAt(i)) {
714e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '/': case '#': case '?':  // No protocol.
724e867904c8295537803c1c8a076e130df5674b58mikesamuel          // Check for domain relative URLs like //www.evil.org/
734e867904c8295537803c1c8a076e130df5674b58mikesamuel          if (s.startsWith("//")
744e867904c8295537803c1c8a076e130df5674b58mikesamuel              // or the protocols by which HTML is normally served are OK.
754e867904c8295537803c1c8a076e130df5674b58mikesamuel              && !allowProtocolRelativeUrls()) {
764e867904c8295537803c1c8a076e130df5674b58mikesamuel            return null;
774e867904c8295537803c1c8a076e130df5674b58mikesamuel          }
784e867904c8295537803c1c8a076e130df5674b58mikesamuel          break protocol_loop;
794e867904c8295537803c1c8a076e130df5674b58mikesamuel        case ':':
807d0755627f174ec9d5f148bd9fa3a5cc732edb3fmikesamuel          String protocol = Strings.toLowerCase(s.substring(0, i));
817d0755627f174ec9d5f148bd9fa3a5cc732edb3fmikesamuel          if (!protocols.contains(protocol)) { return null; }
824e867904c8295537803c1c8a076e130df5674b58mikesamuel          break protocol_loop;
834e867904c8295537803c1c8a076e130df5674b58mikesamuel      }
844e867904c8295537803c1c8a076e130df5674b58mikesamuel    }
854e867904c8295537803c1c8a076e130df5674b58mikesamuel    return normalizeUri(s);
864e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
874e867904c8295537803c1c8a076e130df5674b58mikesamuel
884e867904c8295537803c1c8a076e130df5674b58mikesamuel  protected boolean allowProtocolRelativeUrls() {
894e867904c8295537803c1c8a076e130df5674b58mikesamuel    return protocols.contains("http") && protocols.contains("https");
904e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
914e867904c8295537803c1c8a076e130df5674b58mikesamuel
924e867904c8295537803c1c8a076e130df5674b58mikesamuel  /** Percent encodes anything that looks like a colon, or a parenthesis. */
934e867904c8295537803c1c8a076e130df5674b58mikesamuel  static String normalizeUri(String s) {
944e867904c8295537803c1c8a076e130df5674b58mikesamuel    int n = s.length();
954e867904c8295537803c1c8a076e130df5674b58mikesamuel    boolean colonsIrrelevant = false;
964e867904c8295537803c1c8a076e130df5674b58mikesamuel    for (int i = 0; i < n; ++i) {
974e867904c8295537803c1c8a076e130df5674b58mikesamuel      char ch = s.charAt(i);
984e867904c8295537803c1c8a076e130df5674b58mikesamuel      switch (ch) {
994e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '/': case '#': case '?': case ':':
1004e867904c8295537803c1c8a076e130df5674b58mikesamuel          colonsIrrelevant = true;
1014e867904c8295537803c1c8a076e130df5674b58mikesamuel          break;
1024e867904c8295537803c1c8a076e130df5674b58mikesamuel        case '(': case ')': case '\uff1a':
1034e867904c8295537803c1c8a076e130df5674b58mikesamuel          StringBuilder sb = new StringBuilder(n + 16);
1044e867904c8295537803c1c8a076e130df5674b58mikesamuel          int pos = 0;
1054e867904c8295537803c1c8a076e130df5674b58mikesamuel          for (; i < n; ++i) {
1064e867904c8295537803c1c8a076e130df5674b58mikesamuel            ch = s.charAt(i);
1074e867904c8295537803c1c8a076e130df5674b58mikesamuel            switch (ch) {
1084e867904c8295537803c1c8a076e130df5674b58mikesamuel              case '(':
1094e867904c8295537803c1c8a076e130df5674b58mikesamuel                sb.append(s, pos, i).append("%28");
1104e867904c8295537803c1c8a076e130df5674b58mikesamuel                pos = i + 1;
1114e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
1124e867904c8295537803c1c8a076e130df5674b58mikesamuel              case ')':
1134e867904c8295537803c1c8a076e130df5674b58mikesamuel                sb.append(s, pos, i).append("%29");
1144e867904c8295537803c1c8a076e130df5674b58mikesamuel                pos = i + 1;
1154e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
116c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel              default:
117c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                if (ch > 0x100 && !colonsIrrelevant) {
118c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  // Other colon like characters.
1194e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // TODO: do we need to encode non-colon characters if we're
1204e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // not dealing with URLs that haven't been copy/pasted into
1214e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // the URL bar?
1224e867904c8295537803c1c8a076e130df5674b58mikesamuel                  // Is it safe to assume UTF-8 here?
123c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  switch (ch) {
124c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u0589':
125c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%d6%89");
126c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
127c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
128c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u05c3':
129c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%d7%83");
130c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
131c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
132c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\u2236':
133c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%e2%88%b6");
134c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
135c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
136c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                    case '\uff1a':
137c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      sb.append(s, pos, i).append("%ef%bc%9a");
138c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      pos = i + 1;
139c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                      break;
140c517d7c6cadcd8643d565783464a2728be8c08d9mikesamuel                  }
1414e867904c8295537803c1c8a076e130df5674b58mikesamuel                }
1424e867904c8295537803c1c8a076e130df5674b58mikesamuel                break;
1434e867904c8295537803c1c8a076e130df5674b58mikesamuel            }
1444e867904c8295537803c1c8a076e130df5674b58mikesamuel          }
1454e867904c8295537803c1c8a076e130df5674b58mikesamuel          return sb.append(s, pos, n).toString();
1464e867904c8295537803c1c8a076e130df5674b58mikesamuel      }
1474e867904c8295537803c1c8a076e130df5674b58mikesamuel    }
1484e867904c8295537803c1c8a076e130df5674b58mikesamuel    return s;
1494e867904c8295537803c1c8a076e130df5674b58mikesamuel  }
1504e867904c8295537803c1c8a076e130df5674b58mikesamuel
1519c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  @Override
1529c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  public boolean equals(Object o) {
1539c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel    return o != null && this.getClass() == o.getClass()
1549c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel        && protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols);
1559c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  }
1569c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel
1579c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  @Override
1589c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  public int hashCode() {
1599c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel    return protocols.hashCode();
1609c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel  }
1619c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel
1629c0798e090ee7db347657ed2b8604ce26fbe74d1mikesamuel}
163