156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/*
256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Copyright (C) 2010 Google Inc.
356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Licensed under the Apache License, Version 2.0 (the "License");
556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * you may not use this file except in compliance with the License.
656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * You may obtain a copy of the License at
756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * http://www.apache.org/licenses/LICENSE-2.0
956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
1056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Unless required by applicable law or agreed to in writing, software
1156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * distributed under the License is distributed on an "AS IS" BASIS,
1256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * See the License for the specific language governing permissions and
1456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * limitations under the License.
1556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
1656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpackage com.google.clearsilver.jsilver.functions.html;
1856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
1956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport com.google.clearsilver.jsilver.functions.TextFilter;
2056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
2156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport java.io.IOException;
2256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonimport java.lang.Character.UnicodeBlock;
2356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
2456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson/**
2556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Validates that a given string is either something that looks like a relative URI, or looks like
2656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * an absolute URI using one of a set of allowed schemes (http, https, ftp, mailto). If the string
2756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * is valid according to these criteria, the string is escaped with an appropriate escaping
2856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * function. Otherwise, the string "#" is returned.
2956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
3056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Subclasses will apply the necessary escaping function to the string by overriding {@code
3156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * applyEscaping}.
3256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson *
3356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * <p>
3456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * Note: this function does <em>not</em> validate that the URI is well-formed beyond the scheme part
3556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * (and if the URI appears to be relative, not even then). Note in particular that this function
3656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson * considers strings of the form "www.google.com:80" to be invalid.
3756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson */
3856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodsonpublic abstract class BaseUrlValidateFunction implements TextFilter {
3956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
4056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  @Override
4156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  public void filter(String in, Appendable out) throws IOException {
4256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (!isValidUri(in)) {
4356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      out.append('#');
4456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      return;
4556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
4656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    applyEscaping(in, out);
4756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
4856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
4956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
5056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Called by {@code filter} after verifying that the input is a valid URI. Should apply any
5156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * appropriate escaping to the input string.
5256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *
5356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @throws IOException
5456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
5556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected abstract void applyEscaping(String in, Appendable out) throws IOException;
5656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
5756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
5856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * @return true if a given string either looks like a relative URI, or like an absolute URI with
5956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   *         an allowed scheme.
6056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
6156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  protected boolean isValidUri(String in) {
6256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    // Quick check for the allowed absolute URI schemes.
6356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    String maybeScheme = toLowerCaseAsciiOnly(in.substring(0, Math.min(in.length(), 8)));
6456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (maybeScheme.startsWith("http://") || maybeScheme.startsWith("https://")
6556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson        || maybeScheme.startsWith("ftp://") || maybeScheme.startsWith("mailto:")) {
6656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      return true;
6756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
6856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
6956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    // If it's an absolute URI with a different scheme, it's invalid.
7056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    // ClearSilver defines an absolute URI as one that contains a colon prior
7156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    // to any slash.
7256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    int slashPos = in.indexOf('/');
7356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    if (slashPos != -1) {
7456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      // only colons before this point are bad.
7556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      return in.lastIndexOf(':', slashPos - 1) == -1;
7656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    } else {
7756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      // then any colon is bad.
7856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      return in.indexOf(':') == -1;
7956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
8056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
8156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson
8256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  /**
8356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   * Converts an ASCII string to lowercase. Non-ASCII characters are replaced with '?'.
8456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson   */
8556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  private String toLowerCaseAsciiOnly(String string) {
8656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    char[] ca = string.toCharArray();
8756ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    for (int i = 0; i < ca.length; i++) {
8856ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      char ch = ca[i];
8956ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson      ca[i] =
9056ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson          (Character.UnicodeBlock.of(ch) == UnicodeBlock.BASIC_LATIN)
9156ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson              ? Character.toLowerCase(ch)
9256ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson              : '?';
9356ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    }
9456ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson    return new String(ca);
9556ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson  }
9656ed4167b942ec265f9cee70ac4d71d10b3835ceBen Dodson}
97