HtmlUtils.java revision 783e81cd330a20eb44e5ab81ba5d5c0df5152450
1/**
2 * Copyright (c) 2014, Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.mail.utils;
18
19import android.graphics.Color;
20import android.graphics.Typeface;
21import android.text.SpannableStringBuilder;
22import android.text.Spanned;
23import android.text.style.AbsoluteSizeSpan;
24import android.text.style.ForegroundColorSpan;
25import android.text.style.QuoteSpan;
26import android.text.style.StyleSpan;
27import android.text.style.TypefaceSpan;
28import android.text.style.URLSpan;
29import android.text.style.UnderlineSpan;
30
31import com.android.mail.analytics.AnalyticsTimer;
32import com.google.android.mail.common.base.CharMatcher;
33import com.google.android.mail.common.html.parser.HTML;
34import com.google.android.mail.common.html.parser.HTML4;
35import com.google.android.mail.common.html.parser.HtmlDocument;
36import com.google.android.mail.common.html.parser.HtmlTree;
37import com.google.common.collect.Lists;
38
39import java.util.LinkedList;
40
41public class HtmlUtils {
42
43    /**
44     * Use our custom SpannedConverter to process the HtmlNode results from HtmlTree.
45     * @param html
46     * @return processed HTML as a Spanned
47     */
48    public static Spanned htmlToSpan(String html, HtmlTree.ConverterFactory factory) {
49        AnalyticsTimer.getInstance().trackStart(AnalyticsTimer.COMPOSE_HTML_TO_SPAN);
50        // Get the html "tree"
51        final HtmlTree htmlTree = com.android.mail.utils.Utils.getHtmlTree(html);
52        htmlTree.setConverterFactory(factory);
53        final Spanned spanned = htmlTree.getSpanned();
54        AnalyticsTimer.getInstance().logDuration(AnalyticsTimer.COMPOSE_HTML_TO_SPAN, true,
55                "compose", "html_to_span", null);
56        return spanned;
57    }
58
59    /**
60     * Class that handles converting the html into a Spanned.
61     * This class will only handle a subset of the html tags. Below is the full list:
62     *   - bold
63     *   - italic
64     *   - underline
65     *   - font size
66     *   - font color
67     *   - font face
68     *   - a
69     *   - blockquote
70     *   - p
71     *   - div
72     */
73    public static class SpannedConverter implements HtmlTree.Converter<Spanned> {
74        // Pinto normal text size is 2 while normal for AbsoluteSizeSpan is 12.
75        // So 6 seems to be the magic number here. Html.toHtml also uses 6 as divider.
76        private static final int WEB_TO_ANDROID_SIZE_MULTIPLIER = 6;
77
78        protected final SpannableStringBuilder mBuilder = new SpannableStringBuilder();
79        private final LinkedList<TagWrapper> mSeenTags = Lists.newLinkedList();
80
81        // [copied verbatim from private version in HtmlTree.java]
82        //
83        // White space characters that are collapsed as a single space.
84        // Note that characters such as the non-breaking whitespace
85        // and full-width spaces are not equivalent to the normal spaces.
86        private static final String HTML_SPACE_EQUIVALENTS = " \n\r\t\f";
87
88        @Override
89        public void addNode(HtmlDocument.Node n, int nodeNum, int endNum) {
90            if (n instanceof HtmlDocument.Text) {
91                // If it's just string, let's append it
92                // FIXME: implement proper space/newline/<pre> handling like
93                // HtmlTree.PlainTextPrinter has.
94                final String text = ((HtmlDocument.Text) n).getText();
95                appendNormalText(text);
96            } else if (n instanceof HtmlDocument.Tag) {
97                handleStart((HtmlDocument.Tag) n);
98            } else if (n instanceof HtmlDocument.EndTag) {
99                handleEnd((HtmlDocument.EndTag) n);
100            }
101        }
102
103        /**
104         * Helper function to handle start tag
105         */
106        protected void handleStart(HtmlDocument.Tag tag) {
107            // Special case these tags since they only affect the number of newlines
108            HTML.Element element = tag.getElement();
109            if (HTML4.BR_ELEMENT.equals(element)) {
110                mBuilder.append("\n");
111            } else if (HTML4.P_ELEMENT.equals(element)) {
112                if (mBuilder.length() > 0) {
113                    // Paragraphs must have 2 new lines before itself (to "fake" margin)
114                    appendTwoNewLinesIfApplicable();
115                }
116            } else if (HTML4.DIV_ELEMENT.equals(element)) {
117                if (mBuilder.length() > 0) {
118                    // div should be on a newline
119                    appendOneNewLineIfApplicable();
120                }
121            }
122
123            if (!tag.isSelfTerminating()) {
124                // Add to the stack of tags needing closing tag
125                mSeenTags.push(new TagWrapper(tag, mBuilder.length()));
126            }
127        }
128
129        /**
130         * Helper function to handle end tag
131         */
132        protected void handleEnd(HtmlDocument.EndTag tag) {
133            TagWrapper lastSeen;
134            HTML.Element element = tag.getElement();
135            while ((lastSeen = mSeenTags.poll()) != null && lastSeen.tag.getElement() != null &&
136                    !lastSeen.tag.getElement().equals(element)) { }
137
138            // Misformatted html, just ignore this tag
139            if (lastSeen == null) {
140                return;
141            }
142
143            final Object marker;
144            if (HTML4.B_ELEMENT.equals(element)) {
145                // BOLD
146                marker = new StyleSpan(Typeface.BOLD);
147            } else if (HTML4.I_ELEMENT.equals(element)) {
148                // ITALIC
149                marker = new StyleSpan(Typeface.ITALIC);
150            } else if (HTML4.U_ELEMENT.equals(element)) {
151                // UNDERLINE
152                marker = new UnderlineSpan();
153            } else if (HTML4.A_ELEMENT.equals(element)) {
154                // A HREF
155                HtmlDocument.TagAttribute attr = lastSeen.tag.getAttribute(HTML4.HREF_ATTRIBUTE);
156                // Ignore this tag if it doesn't have a link
157                if (attr == null) {
158                    return;
159                }
160                marker = new URLSpan(attr.getValue());
161            } else if (HTML4.BLOCKQUOTE_ELEMENT.equals(element)) {
162                // BLOCKQUOTE
163                marker = new QuoteSpan();
164            } else if (HTML4.FONT_ELEMENT.equals(element)) {
165                // FONT SIZE/COLOR/FACE, since this can insert more than one span
166                // we special case it and return
167                handleFont(lastSeen);
168                return;
169            } else {
170                // These tags do not add new Spanned into the mBuilder
171                if (HTML4.P_ELEMENT.equals(element)) {
172                    // paragraphs should add 2 newlines after itself.
173                    // TODO (bug): currently always append 2 new lines at end of text because the
174                    // body is wrapped in a <p> tag. We should only append if there are more texts
175                    // after.
176                    appendTwoNewLinesIfApplicable();
177                } else if (HTML4.DIV_ELEMENT.equals(element)) {
178                    // div should add a newline before itself if it's not a newline
179                    appendOneNewLineIfApplicable();
180                }
181
182                return;
183            }
184
185            final int start = lastSeen.startIndex;
186            final int end = mBuilder.length();
187            if (start != end) {
188                mBuilder.setSpan(marker, start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
189            }
190        }
191
192        /**
193         * Helper function to handle end font tags
194         */
195        private void handleFont(TagWrapper wrapper) {
196            final int start = wrapper.startIndex;
197            final int end = mBuilder.length();
198
199            // check font color
200            HtmlDocument.TagAttribute attr = wrapper.tag.getAttribute(HTML4.COLOR_ATTRIBUTE);
201            if (attr != null) {
202                int c = Color.parseColor(attr.getValue());
203                if (c != -1) {
204                    mBuilder.setSpan(new ForegroundColorSpan(c | 0xFF000000), start, end,
205                            Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
206                }
207            }
208
209            // check font size
210            attr = wrapper.tag.getAttribute(HTML4.SIZE_ATTRIBUTE);
211            if (attr != null) {
212                int i = Integer.parseInt(attr.getValue());
213                if (i != -1) {
214                    mBuilder.setSpan(new AbsoluteSizeSpan(i * WEB_TO_ANDROID_SIZE_MULTIPLIER,
215                            true /* use dip */), start, end,Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
216                }
217            }
218
219            // check font typeface
220            attr = wrapper.tag.getAttribute(HTML4.FACE_ATTRIBUTE);
221            if (attr != null) {
222                String[] families = attr.getValue().split(",");
223                for (String family : families) {
224                    mBuilder.setSpan(new TypefaceSpan(family.trim()), start, end,
225                            Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
226                }
227            }
228        }
229
230        private void appendOneNewLineIfApplicable() {
231            if (mBuilder.length() == 0 || mBuilder.charAt(mBuilder.length() - 1) != '\n') {
232                mBuilder.append("\n");
233            }
234        }
235
236        private void appendTwoNewLinesIfApplicable() {
237            appendOneNewLineIfApplicable();
238            if (mBuilder.length() <= 1 || mBuilder.charAt(mBuilder.length() - 2) != '\n') {
239                mBuilder.append("\n");
240            }
241        }
242
243        @Override
244        public int getPlainTextLength() {
245            return mBuilder.length();
246        }
247
248        @Override
249        public Spanned getObject() {
250            return mBuilder;
251        }
252
253        protected void appendNormalText(String text) {
254            // adapted from HtmlTree.PlainTextPrinter#appendNormalText(String)
255
256            if (text.length() == 0) {
257                return;
258            }
259
260            // Strip beginning and ending whitespace.
261            text = CharMatcher.anyOf(HTML_SPACE_EQUIVALENTS).trimFrom(text);
262
263            // Collapse whitespace within the text.
264            text = CharMatcher.anyOf(HTML_SPACE_EQUIVALENTS).collapseFrom(text, ' ');
265
266            mBuilder.append(text);
267        }
268
269        private static class TagWrapper {
270            final HtmlDocument.Tag tag;
271            final int startIndex;
272
273            TagWrapper(HtmlDocument.Tag tag, int startIndex) {
274                this.tag = tag;
275                this.startIndex = startIndex;
276            }
277        }
278    }
279}
280