1207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux/*
2207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * Copyright (C) 2014 The Android Open Source Project
3207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux *
4207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * Licensed under the Apache License, Version 2.0 (the "License");
5207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * you may not use this file except in compliance with the License.
6207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * You may obtain a copy of the License at
7207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux *
8207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux *      http://www.apache.org/licenses/LICENSE-2.0
9207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux *
10207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * Unless required by applicable law or agreed to in writing, software
11207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * distributed under the License is distributed on an "AS IS" BASIS,
12207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * See the License for the specific language governing permissions and
14207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * limitations under the License.
15207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux */
16207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuxpackage com.android.mail.utils;
17207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
1827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieuximport android.os.Looper;
19207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport android.util.Log;
20207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
21207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport com.android.mail.perf.Timer;
22207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport com.google.common.collect.ImmutableList;
23207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport com.google.common.collect.ImmutableSet;
24207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
25207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.AttributePolicy;
26207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.CssSchema;
27207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.ElementPolicy;
28207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.FilterUrlByProtocolAttributePolicy;
29207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.Handler;
30207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.HtmlPolicyBuilder;
31207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.HtmlStreamRenderer;
32207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport org.owasp.html.PolicyFactory;
33207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
34207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuximport java.util.List;
35207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
36207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux/**
37207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * This sanitizer is meant to strip all scripts and any malicious HTML from untrusted emails. It
38207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * uses the <a href="https://www.owasp.org/index.php/OWASP_Java_HTML_Sanitizer_Project">OWASP Java
39207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * HTML Sanitizer Project</a> to whitelist the subset of HTML elements and attributes as well as CSS
40207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * properties that are considered safe. Any unmatched HTML or CSS is discarded.
41207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux *
42207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * All URLS are scrubbed to ensure they match the blessed form of "http://the.url.here",
43207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * "https://the.url.here" or "mailto:address@server.com" and cannot resemble "javascript:badness()"
44207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux * or comparable.
45207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux */
46207a43712406ca88dcc381385ac6a276cb9417e6James Lemieuxpublic final class HtmlSanitizer {
47ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux
48ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux    /**
49ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * This version number should be bumped each time a meaningful change is made to this sanitizer
50ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * configuration which influences its output. It is compared against a minimum target version
51ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * number. If it meets or exceeds the minimum target version, the result of the sanitizer is
52ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * free to be shown in a standard webview. If it does not meet the minimum target version then
53ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * the sanitized output is deemed untrustworthy and is shown in a sandboxed webview with
54ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     * javascript execution disabled.
55ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux     */
56ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux    public static final int VERSION = 1;
57ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux
58207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    private static final String LOG_TAG = LogTag.getLogTag();
59207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
60207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    /**
61207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * The following CSS properties do not appear in the default whitelist from OWASP, but they
62207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * improve the fidelity of the HTML display without unacceptable risk.
63207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     */
64207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    private static final CssSchema ADDITIONAL_CSS = CssSchema.withProperties(ImmutableSet.of(
65207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            "float",
66207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            "display"
67207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    ));
68207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
69207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    /**
70207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * Translates the body tag into the div tag
71207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     */
72207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    private static final ElementPolicy TRANSLATE_BODY_TO_DIV = new ElementPolicy() {
73207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        public String apply(String elementName, List<String> attrs) {
74207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            return "div";
75207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        }
76207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    };
77207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
78207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    /**
7927a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     * Translates <div> tags surrounding quoted text into <div class="elided-text"> which allows
8027a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     * quoted text collapsing in ConversationViewFragment.
8127a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     */
8227a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux    private static final ElementPolicy TRANSLATE_DIV_CLASS = new ElementPolicy() {
8327a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux        public String apply(String elementName, List<String> attrs) {
8427a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            boolean showHideQuotedText = false;
8527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
8627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            // check if the class attribute is listed
8727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            final int classIndex = attrs.indexOf("class");
8827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            if (classIndex >= 0) {
8927a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                // remove the class attribute and its value
9027a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                final String value = attrs.remove(classIndex + 1);
9127a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                attrs.remove(classIndex);
9227a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
9327a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                // gmail and yahoo use a specific div class name to indicate quoted text
9427a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                showHideQuotedText = "gmail_quote".equals(value) || "yahoo_quoted".equals(value);
9527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            }
9627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
9727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            // check if the id attribute is listed
9827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            final int idIndex = attrs.indexOf("id");
9927a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            if (idIndex >= 0) {
10027a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                // remove the id attribute and its value
10127a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                final String value = attrs.remove(idIndex + 1);
10227a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                attrs.remove(idIndex);
10327a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
104ccf9a34bcdcfdb884799ea3f3ecbd425970f26fcJames Lemieux                // AOL uses a specific id value to indicate quoted text
10527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                showHideQuotedText = value.startsWith("AOLMsgPart");
10627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            }
10727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
10827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            // insert a class attribute with a value of "elided-text" to hide/show quoted text
10927a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            if (showHideQuotedText) {
11027a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                attrs.add("class");
11127a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                attrs.add("elided-text");
11227a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            }
11327a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
11427a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            return "div";
11527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux        }
11627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux    };
11727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
11827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux    /**
1197a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux     * Disallow "cid:" and "mailto:" urls on all tags not &lt;a&gt; or &lt;img&gt;.
1207a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux     */
1217a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux    private static final AttributePolicy URL_PROTOCOLS =
1227a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            new FilterUrlByProtocolAttributePolicy(ImmutableList.of("http", "https"));
1237a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux
1247a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux    /**
1257a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux     * Disallow the "cid:" url on links. Do allow "mailto:" urls to support sending mail.
1267a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux     */
1277a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux    private static final AttributePolicy A_HREF_PROTOCOLS =
1287a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            new FilterUrlByProtocolAttributePolicy(ImmutableList.of("mailto", "http", "https"));
1297a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux
1307a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux    /**
131207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * Disallow the "mailto:" url on images so that "Show pictures" can't be used to start composing
1327a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux     * a bajillion emails. Do allow "cid:" urls to support inline image attachments.
133207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     */
1347a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux    private static final AttributePolicy IMG_SRC_PROTOCOLS =
135207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            new FilterUrlByProtocolAttributePolicy(ImmutableList.of("cid", "http", "https"));
136207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
137207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    /**
138207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * This sanitizer policy removes these elements and the content within:
139207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * <ul>
140207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>APPLET</li>
141207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>FRAMESET</li>
142207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>OBJECT</li>
143207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>SCRIPT</li>
144207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>STYLE</li>
145207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>TITLE</li>
146207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * </ul>
147207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *
148207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * This sanitizer policy removes these elements but preserves the content within:
149207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * <ul>
150207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>BASEFONT</li>
151207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>FRAME</li>
152207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>HEAD</li>
153207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>IFRAME</li>
154207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>ISINDEX</li>
155207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>LINK</li>
156207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>META</li>
157207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>NOFRAMES</li>
158207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>PARAM</li>
159207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>NOSCRIPT</li>
160207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * </ul>
161207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *
162207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * This sanitizer policy removes these attributes from all elements:
163207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * <ul>
164207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>code</li>
165207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>codebase</li>
166207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>id</li>
167207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>for</li>
168207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>headers</li>
169207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onblur</li>
170207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onchange</li>
171207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onclick</li>
172207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>ondblclick</li>
173207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onfocus</li>
174207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onkeydown</li>
175207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onkeypress</li>
176207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onkeyup</li>
177207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onload</li>
178207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onmousedown</li>
179207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onmousemove</li>
180207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onmouseout</li>
181207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onmouseover</li>
182207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onmouseup</li>
183207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onreset</li>
184207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onselect</li>
185207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onsubmit</li>
186207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>onunload</li>
187207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *     <li>tabindex</li>
188207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * </ul>
189207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     */
190207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    private static final PolicyFactory POLICY_DEFINITION = new HtmlPolicyBuilder()
191207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowAttributes("dir").matching(true, "ltr", "rtl").globally()
192207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowUrlProtocols("cid", "http", "https", "mailto")
193207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowStyling(CssSchema.union(CssSchema.DEFAULT, ADDITIONAL_CSS))
194207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .disallowTextIn("applet", "frameset", "object", "script", "style", "title")
1957a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            .allowElements("a")
1967a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("coords", "name", "shape").onElements("a")
1977a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("href").matching(A_HREF_PROTOCOLS).onElements("a")
198207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("abbr").allowAttributes("title").onElements("abbr")
199207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("acronym").allowAttributes("title").onElements("acronym")
200207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("address")
201207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("area")
2027a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("alt", "coords", "nohref", "name", "shape").onElements("area")
2037a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("href").matching(URL_PROTOCOLS).onElements("area")
204207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("article")
205207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("aside")
206207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("b")
2077a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            .allowElements("base")
2087a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("href").matching(URL_PROTOCOLS).onElements("base")
209207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("bdi").allowAttributes("dir").onElements("bdi")
210207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("bdo").allowAttributes("dir").onElements("bdo")
211207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("big")
212207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("blockquote").allowAttributes("cite").onElements("blockquote")
213207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements(TRANSLATE_BODY_TO_DIV, "body")
214207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("br").allowAttributes("clear").onElements("br")
215207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("button")
216207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("autofocus", "disabled", "form", "formaction", "formenctype",
217207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "formmethod", "formnovalidate", "formtarget", "name", "type", "value")
218207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("button")
219207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("canvas").allowAttributes("width", "height").onElements("canvas")
220207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("caption").allowAttributes("align").onElements("caption")
221207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("center")
222207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("cite")
223207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("code")
224207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("col")
2257a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("align", "bgcolor", "char", "charoff", "span", "valign", "width")
226207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("col")
227207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("colgroup")
228207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "char", "charoff", "span", "valign", "width")
229207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("colgroup")
230207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("datalist")
231207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("dd")
232207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("del").allowAttributes("cite", "datetime").onElements("del")
233207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("details")
234207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("dfn")
235207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("dir").allowAttributes("compact").onElements("dir")
23627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            .allowElements(TRANSLATE_DIV_CLASS, "div")
23727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux                .allowAttributes("align", "background", "class", "id")
23827a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            .onElements("div")
239207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("dl")
240207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("dt")
241207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("em")
242207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("fieldset")
243207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("disabled", "form", "name")
244207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("fieldset")
245207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("figcaption")
246207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("figure")
247207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("font").allowAttributes("color", "face", "size").onElements("font")
248207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("footer")
249207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("form")
250207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("accept", "action", "accept-charset", "autocomplete", "enctype",
251207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "method", "name", "novalidate", "target")
252207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("form")
253207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("header")
254207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h1").allowAttributes("align").onElements("h1")
255207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h2").allowAttributes("align").onElements("h2")
256207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h3").allowAttributes("align").onElements("h3")
257207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h4").allowAttributes("align").onElements("h4")
258207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h5").allowAttributes("align").onElements("h5")
259207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("h6").allowAttributes("align").onElements("h6")
260207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("hr")
261207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "noshade", "size", "width")
262207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("hr")
263207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("i")
264207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("img")
2657a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("src").matching(IMG_SRC_PROTOCOLS).onElements("img")
2667a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("longdesc").matching(URL_PROTOCOLS).onElements("img")
267207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "alt", "border", "crossorigin", "height", "hspace",
2687a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "ismap", "usemap", "vspace", "width")
269207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("img")
270207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("input")
2717a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("src").matching(URL_PROTOCOLS).onElements("input")
2727a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("formaction").matching(URL_PROTOCOLS).onElements("input")
273207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("accept", "align", "alt", "autocomplete", "autofocus", "checked",
2747a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "disabled", "form", "formenctype", "formmethod", "formnovalidate",
2757a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "formtarget", "height", "list", "max", "maxlength", "min", "multiple",
2767a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "name", "pattern", "placeholder", "readonly", "required", "size", "step",
2777a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "type", "value", "width")
278207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("input")
2797a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            .allowElements("ins")
2807a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("cite").matching(URL_PROTOCOLS).onElements("ins")
2817a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("datetime").onElements("ins")
282207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("kbd")
283207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("keygen")
284207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("autofocus", "challenge", "disabled", "form", "keytype", "name")
285207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("keygen")
286207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("label").allowAttributes("form").onElements("label")
287207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("legend").allowAttributes("align").onElements("legend")
288207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("li").allowAttributes("type", "value").onElements("li")
289207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("main")
290207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("map").allowAttributes("name").onElements("map")
291207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("mark")
292207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("menu").allowAttributes("label", "type").onElements("menu")
293207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("menuitem")
2947a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("icon").matching(URL_PROTOCOLS).onElements("menuitem")
2957a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                .allowAttributes("checked", "command", "default", "disabled", "label", "type",
2967a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux                        "radiogroup").onElements("menuitem")
297207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("meter")
298207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("form", "high", "low", "max", "min", "optimum", "value")
299207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("meter")
300207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("nav")
301207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("ol")
302207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("compact", "reversed", "start", "type")
303207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("ol")
304207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("optgroup").allowAttributes("disabled", "label").onElements("optgroup")
305207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("option")
306207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("disabled", "label", "selected", "value")
307207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("option")
308207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("output").allowAttributes("form", "name").onElements("output")
309207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("p").allowAttributes("align").onElements("p")
310207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("pre").allowAttributes("width").onElements("pre")
311207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("progress").allowAttributes("max", "value").onElements("progress")
3127a67037e84098e5d25798eb1e0e76d67275baab9James Lemieux            .allowElements("q").allowAttributes("cite").matching(URL_PROTOCOLS).onElements("q")
313207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("rp")
314207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("rt")
315207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("ruby")
316207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("s")
317207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("samp")
318207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("section")
319207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("select")
320207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("autofocus", "disabled", "form", "multiple", "name", "required",
321207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "size")
322207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("select")
323207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("small")
324207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("span")
325207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("strike")
326207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("strong")
327207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("sub")
328207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("summary")
329207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("sup")
330207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("table")
331207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "bgcolor", "border", "cellpadding", "cellspacing",
332207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "frame", "rules", "sortable", "summary", "width")
333207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("table")
334207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("tbody")
335207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "char", "charoff", "valign").onElements("tbody")
336207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("td")
337207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan",
338207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "height", "nowrap", "rowspan", "scope", "valign", "width")
339207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("td")
340207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("textarea")
341207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("autofocus", "cols", "disabled", "form", "maxlength", "name",
342207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "placeholder", "readonly", "required", "rows", "wrap")
343207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("textarea")
344207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("tfoot")
345207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "char", "charoff", "valign").onElements("tfoot")
346207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("th")
347207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan",
348207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        "height", "nowrap", "rowspan", "scope", "sorted", "valign", "width")
349207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .onElements("th")
350207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("thead")
351207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "char", "charoff", "valign").onElements("thead")
352207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("time").allowAttributes("datetime").onElements("time")
353207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("tr")
354207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                .allowAttributes("align", "bgcolor", "char", "charoff", "valign").onElements("tr")
355207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("tt")
356207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("u")
357207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("ul").allowAttributes("compact", "type").onElements("ul")
358207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("var")
359207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .allowElements("wbr")
360207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            .toFactory();
361207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
362207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    private HtmlSanitizer() {}
363207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
364207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    /**
36527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     * Sanitizing email is treated as an expensive operation; this method should be called from
36627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     * a background Thread.
36727a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux     *
368207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * @param rawHtml the unsanitized, suspicious html
369207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     * @return the sanitized form of the <code>rawHtml</code>; <code>null</code> if
370207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     *      <code>rawHtml</code> was <code>null</code>
371207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux     */
372207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    public static String sanitizeHtml(final String rawHtml) {
37327a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux        if (Looper.getMainLooper() == Looper.myLooper()) {
37427a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux            throw new IllegalStateException("sanitizing email should not occur on the main thread");
37527a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux        }
37627a36a6bbeebb6cfd53ad2766463d71ab4b26ce0James Lemieux
377207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        if (rawHtml == null) {
378207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            return null;
379207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        }
380207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
381207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        // create the builder into which the sanitized email will be written
382207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        final StringBuilder htmlBuilder = new StringBuilder(rawHtml.length());
383207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
384207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        // create the renderer that will write the sanitized HTML to the builder
385207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        final HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
386207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                htmlBuilder,
387207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                Handler.PROPAGATE,
388207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                // log errors resulting from exceptionally bizarre inputs
389207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                new Handler<String>() {
390207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                    public void handle(final String x) {
391207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        Log.wtf(LOG_TAG, "Mangled HTML content cannot be parsed: " + x);
392207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                        throw new AssertionError(x);
393207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                    }
394207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux                }
395207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        );
396207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
397207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        // create a thread-specific policy
398207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        final org.owasp.html.HtmlSanitizer.Policy policy = POLICY_DEFINITION.apply(renderer);
399207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
400207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        // run the html through the sanitizer
401207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        Timer.startTiming("sanitizingHTMLEmail");
402207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        try {
403207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            org.owasp.html.HtmlSanitizer.sanitize(rawHtml, policy);
404207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        } finally {
405207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux            Timer.stopTiming("sanitizingHTMLEmail");
406207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        }
407207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux
408207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        // return the resulting HTML from the builder
409207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux        return htmlBuilder.toString();
410207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux    }
411207a43712406ca88dcc381385ac6a276cb9417e6James Lemieux}
412