1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text;
18
19import android.app.ActivityThread;
20import android.app.Application;
21import android.content.res.Resources;
22import android.graphics.Color;
23import android.graphics.Typeface;
24import android.graphics.drawable.Drawable;
25import android.text.style.AbsoluteSizeSpan;
26import android.text.style.AlignmentSpan;
27import android.text.style.BackgroundColorSpan;
28import android.text.style.BulletSpan;
29import android.text.style.CharacterStyle;
30import android.text.style.ForegroundColorSpan;
31import android.text.style.ImageSpan;
32import android.text.style.ParagraphStyle;
33import android.text.style.QuoteSpan;
34import android.text.style.RelativeSizeSpan;
35import android.text.style.StrikethroughSpan;
36import android.text.style.StyleSpan;
37import android.text.style.SubscriptSpan;
38import android.text.style.SuperscriptSpan;
39import android.text.style.TypefaceSpan;
40import android.text.style.URLSpan;
41import android.text.style.UnderlineSpan;
42
43import org.ccil.cowan.tagsoup.HTMLSchema;
44import org.ccil.cowan.tagsoup.Parser;
45import org.xml.sax.Attributes;
46import org.xml.sax.ContentHandler;
47import org.xml.sax.InputSource;
48import org.xml.sax.Locator;
49import org.xml.sax.SAXException;
50import org.xml.sax.XMLReader;
51
52import java.io.IOException;
53import java.io.StringReader;
54import java.util.HashMap;
55import java.util.Locale;
56import java.util.Map;
57import java.util.regex.Matcher;
58import java.util.regex.Pattern;
59
60/**
61 * This class processes HTML strings into displayable styled text.
62 * Not all HTML tags are supported.
63 */
64public class Html {
65    /**
66     * Retrieves images for HTML <img> tags.
67     */
68    public static interface ImageGetter {
69        /**
70         * This method is called when the HTML parser encounters an
71         * &lt;img&gt; tag.  The <code>source</code> argument is the
72         * string from the "src" attribute; the return value should be
73         * a Drawable representation of the image or <code>null</code>
74         * for a generic replacement image.  Make sure you call
75         * setBounds() on your Drawable if it doesn't already have
76         * its bounds set.
77         */
78        public Drawable getDrawable(String source);
79    }
80
81    /**
82     * Is notified when HTML tags are encountered that the parser does
83     * not know how to interpret.
84     */
85    public static interface TagHandler {
86        /**
87         * This method will be called whenn the HTML parser encounters
88         * a tag that it does not know how to interpret.
89         */
90        public void handleTag(boolean opening, String tag,
91                                 Editable output, XMLReader xmlReader);
92    }
93
94    /**
95     * Option for {@link #toHtml(Spanned, int)}: Wrap consecutive lines of text delimited by '\n'
96     * inside &lt;p&gt; elements. {@link BulletSpan}s are ignored.
97     */
98    public static final int TO_HTML_PARAGRAPH_LINES_CONSECUTIVE = 0x00000000;
99
100    /**
101     * Option for {@link #toHtml(Spanned, int)}: Wrap each line of text delimited by '\n' inside a
102     * &lt;p&gt; or a &lt;li&gt; element. This allows {@link ParagraphStyle}s attached to be
103     * encoded as CSS styles within the corresponding &lt;p&gt; or &lt;li&gt; element.
104     */
105    public static final int TO_HTML_PARAGRAPH_LINES_INDIVIDUAL = 0x00000001;
106
107    /**
108     * Flag indicating that texts inside &lt;p&gt; elements will be separated from other texts with
109     * one newline character by default.
110     */
111    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_PARAGRAPH = 0x00000001;
112
113    /**
114     * Flag indicating that texts inside &lt;h1&gt;~&lt;h6&gt; elements will be separated from
115     * other texts with one newline character by default.
116     */
117    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_HEADING = 0x00000002;
118
119    /**
120     * Flag indicating that texts inside &lt;li&gt; elements will be separated from other texts
121     * with one newline character by default.
122     */
123    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_LIST_ITEM = 0x00000004;
124
125    /**
126     * Flag indicating that texts inside &lt;ul&gt; elements will be separated from other texts
127     * with one newline character by default.
128     */
129    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_LIST = 0x00000008;
130
131    /**
132     * Flag indicating that texts inside &lt;div&gt; elements will be separated from other texts
133     * with one newline character by default.
134     */
135    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_DIV = 0x00000010;
136
137    /**
138     * Flag indicating that texts inside &lt;blockquote&gt; elements will be separated from other
139     * texts with one newline character by default.
140     */
141    public static final int FROM_HTML_SEPARATOR_LINE_BREAK_BLOCKQUOTE = 0x00000020;
142
143    /**
144     * Flag indicating that CSS color values should be used instead of those defined in
145     * {@link Color}.
146     */
147    public static final int FROM_HTML_OPTION_USE_CSS_COLORS = 0x00000100;
148
149    /**
150     * Flags for {@link #fromHtml(String, int, ImageGetter, TagHandler)}: Separate block-level
151     * elements with blank lines (two newline characters) in between. This is the legacy behavior
152     * prior to N.
153     */
154    public static final int FROM_HTML_MODE_LEGACY = 0x00000000;
155
156    /**
157     * Flags for {@link #fromHtml(String, int, ImageGetter, TagHandler)}: Separate block-level
158     * elements with line breaks (single newline character) in between. This inverts the
159     * {@link Spanned} to HTML string conversion done with the option
160     * {@link #TO_HTML_PARAGRAPH_LINES_INDIVIDUAL}.
161     */
162    public static final int FROM_HTML_MODE_COMPACT =
163            FROM_HTML_SEPARATOR_LINE_BREAK_PARAGRAPH
164            | FROM_HTML_SEPARATOR_LINE_BREAK_HEADING
165            | FROM_HTML_SEPARATOR_LINE_BREAK_LIST_ITEM
166            | FROM_HTML_SEPARATOR_LINE_BREAK_LIST
167            | FROM_HTML_SEPARATOR_LINE_BREAK_DIV
168            | FROM_HTML_SEPARATOR_LINE_BREAK_BLOCKQUOTE;
169
170    /**
171     * The bit which indicates if lines delimited by '\n' will be grouped into &lt;p&gt; elements.
172     */
173    private static final int TO_HTML_PARAGRAPH_FLAG = 0x00000001;
174
175    private Html() { }
176
177    /**
178     * Returns displayable styled text from the provided HTML string with the legacy flags
179     * {@link #FROM_HTML_MODE_LEGACY}.
180     *
181     * @deprecated use {@link #fromHtml(String, int)} instead.
182     */
183    @Deprecated
184    public static Spanned fromHtml(String source) {
185        return fromHtml(source, FROM_HTML_MODE_LEGACY, null, null);
186    }
187
188    /**
189     * Returns displayable styled text from the provided HTML string. Any &lt;img&gt; tags in the
190     * HTML will display as a generic replacement image which your program can then go through and
191     * replace with real images.
192     *
193     * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
194     */
195    public static Spanned fromHtml(String source, int flags) {
196        return fromHtml(source, flags, null, null);
197    }
198
199    /**
200     * Lazy initialization holder for HTML parser. This class will
201     * a) be preloaded by the zygote, or b) not loaded until absolutely
202     * necessary.
203     */
204    private static class HtmlParser {
205        private static final HTMLSchema schema = new HTMLSchema();
206    }
207
208    /**
209     * Returns displayable styled text from the provided HTML string with the legacy flags
210     * {@link #FROM_HTML_MODE_LEGACY}.
211     *
212     * @deprecated use {@link #fromHtml(String, int, ImageGetter, TagHandler)} instead.
213     */
214    @Deprecated
215    public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) {
216        return fromHtml(source, FROM_HTML_MODE_LEGACY, imageGetter, tagHandler);
217    }
218
219    /**
220     * Returns displayable styled text from the provided HTML string. Any &lt;img&gt; tags in the
221     * HTML will use the specified ImageGetter to request a representation of the image (use null
222     * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
223     * you don't want this).
224     *
225     * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
226     */
227    public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter,
228            TagHandler tagHandler) {
229        Parser parser = new Parser();
230        try {
231            parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
232        } catch (org.xml.sax.SAXNotRecognizedException e) {
233            // Should not happen.
234            throw new RuntimeException(e);
235        } catch (org.xml.sax.SAXNotSupportedException e) {
236            // Should not happen.
237            throw new RuntimeException(e);
238        }
239
240        HtmlToSpannedConverter converter =
241                new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags);
242        return converter.convert();
243    }
244
245    /**
246     * @deprecated use {@link #toHtml(Spanned, int)} instead.
247     */
248    @Deprecated
249    public static String toHtml(Spanned text) {
250        return toHtml(text, TO_HTML_PARAGRAPH_LINES_CONSECUTIVE);
251    }
252
253    /**
254     * Returns an HTML representation of the provided Spanned text. A best effort is
255     * made to add HTML tags corresponding to spans. Also note that HTML metacharacters
256     * (such as "&lt;" and "&amp;") within the input text are escaped.
257     *
258     * @param text input text to convert
259     * @param option one of {@link #TO_HTML_PARAGRAPH_LINES_CONSECUTIVE} or
260     *     {@link #TO_HTML_PARAGRAPH_LINES_INDIVIDUAL}
261     * @return string containing input converted to HTML
262     */
263    public static String toHtml(Spanned text, int option) {
264        StringBuilder out = new StringBuilder();
265        withinHtml(out, text, option);
266        return out.toString();
267    }
268
269    /**
270     * Returns an HTML escaped representation of the given plain text.
271     */
272    public static String escapeHtml(CharSequence text) {
273        StringBuilder out = new StringBuilder();
274        withinStyle(out, text, 0, text.length());
275        return out.toString();
276    }
277
278    private static void withinHtml(StringBuilder out, Spanned text, int option) {
279        if ((option & TO_HTML_PARAGRAPH_FLAG) == TO_HTML_PARAGRAPH_LINES_CONSECUTIVE) {
280            encodeTextAlignmentByDiv(out, text, option);
281            return;
282        }
283
284        withinDiv(out, text, 0, text.length(), option);
285    }
286
287    private static void encodeTextAlignmentByDiv(StringBuilder out, Spanned text, int option) {
288        int len = text.length();
289
290        int next;
291        for (int i = 0; i < len; i = next) {
292            next = text.nextSpanTransition(i, len, ParagraphStyle.class);
293            ParagraphStyle[] style = text.getSpans(i, next, ParagraphStyle.class);
294            String elements = " ";
295            boolean needDiv = false;
296
297            for(int j = 0; j < style.length; j++) {
298                if (style[j] instanceof AlignmentSpan) {
299                    Layout.Alignment align =
300                        ((AlignmentSpan) style[j]).getAlignment();
301                    needDiv = true;
302                    if (align == Layout.Alignment.ALIGN_CENTER) {
303                        elements = "align=\"center\" " + elements;
304                    } else if (align == Layout.Alignment.ALIGN_OPPOSITE) {
305                        elements = "align=\"right\" " + elements;
306                    } else {
307                        elements = "align=\"left\" " + elements;
308                    }
309                }
310            }
311            if (needDiv) {
312                out.append("<div ").append(elements).append(">");
313            }
314
315            withinDiv(out, text, i, next, option);
316
317            if (needDiv) {
318                out.append("</div>");
319            }
320        }
321    }
322
323    private static void withinDiv(StringBuilder out, Spanned text, int start, int end,
324            int option) {
325        int next;
326        for (int i = start; i < end; i = next) {
327            next = text.nextSpanTransition(i, end, QuoteSpan.class);
328            QuoteSpan[] quotes = text.getSpans(i, next, QuoteSpan.class);
329
330            for (QuoteSpan quote : quotes) {
331                out.append("<blockquote>");
332            }
333
334            withinBlockquote(out, text, i, next, option);
335
336            for (QuoteSpan quote : quotes) {
337                out.append("</blockquote>\n");
338            }
339        }
340    }
341
342    private static String getTextDirection(Spanned text, int start, int end) {
343        if (TextDirectionHeuristics.FIRSTSTRONG_LTR.isRtl(text, start, end - start)) {
344            return " dir=\"rtl\"";
345        } else {
346            return " dir=\"ltr\"";
347        }
348    }
349
350    private static String getTextStyles(Spanned text, int start, int end,
351            boolean forceNoVerticalMargin, boolean includeTextAlign) {
352        String margin = null;
353        String textAlign = null;
354
355        if (forceNoVerticalMargin) {
356            margin = "margin-top:0; margin-bottom:0;";
357        }
358        if (includeTextAlign) {
359            final AlignmentSpan[] alignmentSpans = text.getSpans(start, end, AlignmentSpan.class);
360
361            // Only use the last AlignmentSpan with flag SPAN_PARAGRAPH
362            for (int i = alignmentSpans.length - 1; i >= 0; i--) {
363                AlignmentSpan s = alignmentSpans[i];
364                if ((text.getSpanFlags(s) & Spanned.SPAN_PARAGRAPH) == Spanned.SPAN_PARAGRAPH) {
365                    final Layout.Alignment alignment = s.getAlignment();
366                    if (alignment == Layout.Alignment.ALIGN_NORMAL) {
367                        textAlign = "text-align:start;";
368                    } else if (alignment == Layout.Alignment.ALIGN_CENTER) {
369                        textAlign = "text-align:center;";
370                    } else if (alignment == Layout.Alignment.ALIGN_OPPOSITE) {
371                        textAlign = "text-align:end;";
372                    }
373                    break;
374                }
375            }
376        }
377
378        if (margin == null && textAlign == null) {
379            return "";
380        }
381
382        final StringBuilder style = new StringBuilder(" style=\"");
383        if (margin != null && textAlign != null) {
384            style.append(margin).append(" ").append(textAlign);
385        } else if (margin != null) {
386            style.append(margin);
387        } else if (textAlign != null) {
388            style.append(textAlign);
389        }
390
391        return style.append("\"").toString();
392    }
393
394    private static void withinBlockquote(StringBuilder out, Spanned text, int start, int end,
395            int option) {
396        if ((option & TO_HTML_PARAGRAPH_FLAG) == TO_HTML_PARAGRAPH_LINES_CONSECUTIVE) {
397            withinBlockquoteConsecutive(out, text, start, end);
398        } else {
399            withinBlockquoteIndividual(out, text, start, end);
400        }
401    }
402
403    private static void withinBlockquoteIndividual(StringBuilder out, Spanned text, int start,
404            int end) {
405        boolean isInList = false;
406        int next;
407        for (int i = start; i <= end; i = next) {
408            next = TextUtils.indexOf(text, '\n', i, end);
409            if (next < 0) {
410                next = end;
411            }
412
413            if (next == i) {
414                if (isInList) {
415                    // Current paragraph is no longer a list item; close the previously opened list
416                    isInList = false;
417                    out.append("</ul>\n");
418                }
419                out.append("<br>\n");
420            } else {
421                boolean isListItem = false;
422                ParagraphStyle[] paragraphStyles = text.getSpans(i, next, ParagraphStyle.class);
423                for (ParagraphStyle paragraphStyle : paragraphStyles) {
424                    final int spanFlags = text.getSpanFlags(paragraphStyle);
425                    if ((spanFlags & Spanned.SPAN_PARAGRAPH) == Spanned.SPAN_PARAGRAPH
426                            && paragraphStyle instanceof BulletSpan) {
427                        isListItem = true;
428                        break;
429                    }
430                }
431
432                if (isListItem && !isInList) {
433                    // Current paragraph is the first item in a list
434                    isInList = true;
435                    out.append("<ul")
436                            .append(getTextStyles(text, i, next, true, false))
437                            .append(">\n");
438                }
439
440                if (isInList && !isListItem) {
441                    // Current paragraph is no longer a list item; close the previously opened list
442                    isInList = false;
443                    out.append("</ul>\n");
444                }
445
446                String tagType = isListItem ? "li" : "p";
447                out.append("<").append(tagType)
448                        .append(getTextDirection(text, i, next))
449                        .append(getTextStyles(text, i, next, !isListItem, true))
450                        .append(">");
451
452                withinParagraph(out, text, i, next);
453
454                out.append("</");
455                out.append(tagType);
456                out.append(">\n");
457
458                if (next == end && isInList) {
459                    isInList = false;
460                    out.append("</ul>\n");
461                }
462            }
463
464            next++;
465        }
466    }
467
468    private static void withinBlockquoteConsecutive(StringBuilder out, Spanned text, int start,
469            int end) {
470        out.append("<p").append(getTextDirection(text, start, end)).append(">");
471
472        int next;
473        for (int i = start; i < end; i = next) {
474            next = TextUtils.indexOf(text, '\n', i, end);
475            if (next < 0) {
476                next = end;
477            }
478
479            int nl = 0;
480
481            while (next < end && text.charAt(next) == '\n') {
482                nl++;
483                next++;
484            }
485
486            withinParagraph(out, text, i, next - nl);
487
488            if (nl == 1) {
489                out.append("<br>\n");
490            } else {
491                for (int j = 2; j < nl; j++) {
492                    out.append("<br>");
493                }
494                if (next != end) {
495                    /* Paragraph should be closed and reopened */
496                    out.append("</p>\n");
497                    out.append("<p").append(getTextDirection(text, start, end)).append(">");
498                }
499            }
500        }
501
502        out.append("</p>\n");
503    }
504
505    private static void withinParagraph(StringBuilder out, Spanned text, int start, int end) {
506        int next;
507        for (int i = start; i < end; i = next) {
508            next = text.nextSpanTransition(i, end, CharacterStyle.class);
509            CharacterStyle[] style = text.getSpans(i, next, CharacterStyle.class);
510
511            for (int j = 0; j < style.length; j++) {
512                if (style[j] instanceof StyleSpan) {
513                    int s = ((StyleSpan) style[j]).getStyle();
514
515                    if ((s & Typeface.BOLD) != 0) {
516                        out.append("<b>");
517                    }
518                    if ((s & Typeface.ITALIC) != 0) {
519                        out.append("<i>");
520                    }
521                }
522                if (style[j] instanceof TypefaceSpan) {
523                    String s = ((TypefaceSpan) style[j]).getFamily();
524
525                    if ("monospace".equals(s)) {
526                        out.append("<tt>");
527                    }
528                }
529                if (style[j] instanceof SuperscriptSpan) {
530                    out.append("<sup>");
531                }
532                if (style[j] instanceof SubscriptSpan) {
533                    out.append("<sub>");
534                }
535                if (style[j] instanceof UnderlineSpan) {
536                    out.append("<u>");
537                }
538                if (style[j] instanceof StrikethroughSpan) {
539                    out.append("<span style=\"text-decoration:line-through;\">");
540                }
541                if (style[j] instanceof URLSpan) {
542                    out.append("<a href=\"");
543                    out.append(((URLSpan) style[j]).getURL());
544                    out.append("\">");
545                }
546                if (style[j] instanceof ImageSpan) {
547                    out.append("<img src=\"");
548                    out.append(((ImageSpan) style[j]).getSource());
549                    out.append("\">");
550
551                    // Don't output the dummy character underlying the image.
552                    i = next;
553                }
554                if (style[j] instanceof AbsoluteSizeSpan) {
555                    AbsoluteSizeSpan s = ((AbsoluteSizeSpan) style[j]);
556                    float sizeDip = s.getSize();
557                    if (!s.getDip()) {
558                        Application application = ActivityThread.currentApplication();
559                        sizeDip /= application.getResources().getDisplayMetrics().density;
560                    }
561
562                    // px in CSS is the equivalance of dip in Android
563                    out.append(String.format("<span style=\"font-size:%.0fpx\";>", sizeDip));
564                }
565                if (style[j] instanceof RelativeSizeSpan) {
566                    float sizeEm = ((RelativeSizeSpan) style[j]).getSizeChange();
567                    out.append(String.format("<span style=\"font-size:%.2fem;\">", sizeEm));
568                }
569                if (style[j] instanceof ForegroundColorSpan) {
570                    int color = ((ForegroundColorSpan) style[j]).getForegroundColor();
571                    out.append(String.format("<span style=\"color:#%06X;\">", 0xFFFFFF & color));
572                }
573                if (style[j] instanceof BackgroundColorSpan) {
574                    int color = ((BackgroundColorSpan) style[j]).getBackgroundColor();
575                    out.append(String.format("<span style=\"background-color:#%06X;\">",
576                            0xFFFFFF & color));
577                }
578            }
579
580            withinStyle(out, text, i, next);
581
582            for (int j = style.length - 1; j >= 0; j--) {
583                if (style[j] instanceof BackgroundColorSpan) {
584                    out.append("</span>");
585                }
586                if (style[j] instanceof ForegroundColorSpan) {
587                    out.append("</span>");
588                }
589                if (style[j] instanceof RelativeSizeSpan) {
590                    out.append("</span>");
591                }
592                if (style[j] instanceof AbsoluteSizeSpan) {
593                    out.append("</span>");
594                }
595                if (style[j] instanceof URLSpan) {
596                    out.append("</a>");
597                }
598                if (style[j] instanceof StrikethroughSpan) {
599                    out.append("</span>");
600                }
601                if (style[j] instanceof UnderlineSpan) {
602                    out.append("</u>");
603                }
604                if (style[j] instanceof SubscriptSpan) {
605                    out.append("</sub>");
606                }
607                if (style[j] instanceof SuperscriptSpan) {
608                    out.append("</sup>");
609                }
610                if (style[j] instanceof TypefaceSpan) {
611                    String s = ((TypefaceSpan) style[j]).getFamily();
612
613                    if (s.equals("monospace")) {
614                        out.append("</tt>");
615                    }
616                }
617                if (style[j] instanceof StyleSpan) {
618                    int s = ((StyleSpan) style[j]).getStyle();
619
620                    if ((s & Typeface.BOLD) != 0) {
621                        out.append("</b>");
622                    }
623                    if ((s & Typeface.ITALIC) != 0) {
624                        out.append("</i>");
625                    }
626                }
627            }
628        }
629    }
630
631    private static void withinStyle(StringBuilder out, CharSequence text,
632                                    int start, int end) {
633        for (int i = start; i < end; i++) {
634            char c = text.charAt(i);
635
636            if (c == '<') {
637                out.append("&lt;");
638            } else if (c == '>') {
639                out.append("&gt;");
640            } else if (c == '&') {
641                out.append("&amp;");
642            } else if (c >= 0xD800 && c <= 0xDFFF) {
643                if (c < 0xDC00 && i + 1 < end) {
644                    char d = text.charAt(i + 1);
645                    if (d >= 0xDC00 && d <= 0xDFFF) {
646                        i++;
647                        int codepoint = 0x010000 | (int) c - 0xD800 << 10 | (int) d - 0xDC00;
648                        out.append("&#").append(codepoint).append(";");
649                    }
650                }
651            } else if (c > 0x7E || c < ' ') {
652                out.append("&#").append((int) c).append(";");
653            } else if (c == ' ') {
654                while (i + 1 < end && text.charAt(i + 1) == ' ') {
655                    out.append("&nbsp;");
656                    i++;
657                }
658
659                out.append(' ');
660            } else {
661                out.append(c);
662            }
663        }
664    }
665}
666
667class HtmlToSpannedConverter implements ContentHandler {
668
669    private static final float[] HEADING_SIZES = {
670        1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f,
671    };
672
673    private String mSource;
674    private XMLReader mReader;
675    private SpannableStringBuilder mSpannableStringBuilder;
676    private Html.ImageGetter mImageGetter;
677    private Html.TagHandler mTagHandler;
678    private int mFlags;
679
680    private static Pattern sTextAlignPattern;
681    private static Pattern sForegroundColorPattern;
682    private static Pattern sBackgroundColorPattern;
683    private static Pattern sTextDecorationPattern;
684
685    /**
686     * Name-value mapping of HTML/CSS colors which have different values in {@link Color}.
687     */
688    private static final Map<String, Integer> sColorMap;
689
690    static {
691        sColorMap = new HashMap<>();
692        sColorMap.put("darkgray", 0xFFA9A9A9);
693        sColorMap.put("gray", 0xFF808080);
694        sColorMap.put("lightgray", 0xFFD3D3D3);
695        sColorMap.put("darkgrey", 0xFFA9A9A9);
696        sColorMap.put("grey", 0xFF808080);
697        sColorMap.put("lightgrey", 0xFFD3D3D3);
698        sColorMap.put("green", 0xFF008000);
699    }
700
701    private static Pattern getTextAlignPattern() {
702        if (sTextAlignPattern == null) {
703            sTextAlignPattern = Pattern.compile("(?:\\s+|\\A)text-align\\s*:\\s*(\\S*)\\b");
704        }
705        return sTextAlignPattern;
706    }
707
708    private static Pattern getForegroundColorPattern() {
709        if (sForegroundColorPattern == null) {
710            sForegroundColorPattern = Pattern.compile(
711                    "(?:\\s+|\\A)color\\s*:\\s*(\\S*)\\b");
712        }
713        return sForegroundColorPattern;
714    }
715
716    private static Pattern getBackgroundColorPattern() {
717        if (sBackgroundColorPattern == null) {
718            sBackgroundColorPattern = Pattern.compile(
719                    "(?:\\s+|\\A)background(?:-color)?\\s*:\\s*(\\S*)\\b");
720        }
721        return sBackgroundColorPattern;
722    }
723
724    private static Pattern getTextDecorationPattern() {
725        if (sTextDecorationPattern == null) {
726            sTextDecorationPattern = Pattern.compile(
727                    "(?:\\s+|\\A)text-decoration\\s*:\\s*(\\S*)\\b");
728        }
729        return sTextDecorationPattern;
730    }
731
732    public HtmlToSpannedConverter( String source, Html.ImageGetter imageGetter,
733            Html.TagHandler tagHandler, Parser parser, int flags) {
734        mSource = source;
735        mSpannableStringBuilder = new SpannableStringBuilder();
736        mImageGetter = imageGetter;
737        mTagHandler = tagHandler;
738        mReader = parser;
739        mFlags = flags;
740    }
741
742    public Spanned convert() {
743
744        mReader.setContentHandler(this);
745        try {
746            mReader.parse(new InputSource(new StringReader(mSource)));
747        } catch (IOException e) {
748            // We are reading from a string. There should not be IO problems.
749            throw new RuntimeException(e);
750        } catch (SAXException e) {
751            // TagSoup doesn't throw parse exceptions.
752            throw new RuntimeException(e);
753        }
754
755        // Fix flags and range for paragraph-type markup.
756        Object[] obj = mSpannableStringBuilder.getSpans(0, mSpannableStringBuilder.length(), ParagraphStyle.class);
757        for (int i = 0; i < obj.length; i++) {
758            int start = mSpannableStringBuilder.getSpanStart(obj[i]);
759            int end = mSpannableStringBuilder.getSpanEnd(obj[i]);
760
761            // If the last line of the range is blank, back off by one.
762            if (end - 2 >= 0) {
763                if (mSpannableStringBuilder.charAt(end - 1) == '\n' &&
764                    mSpannableStringBuilder.charAt(end - 2) == '\n') {
765                    end--;
766                }
767            }
768
769            if (end == start) {
770                mSpannableStringBuilder.removeSpan(obj[i]);
771            } else {
772                mSpannableStringBuilder.setSpan(obj[i], start, end, Spannable.SPAN_PARAGRAPH);
773            }
774        }
775
776        return mSpannableStringBuilder;
777    }
778
779    private void handleStartTag(String tag, Attributes attributes) {
780        if (tag.equalsIgnoreCase("br")) {
781            // We don't need to handle this. TagSoup will ensure that there's a </br> for each <br>
782            // so we can safely emit the linebreaks when we handle the close tag.
783        } else if (tag.equalsIgnoreCase("p")) {
784            startBlockElement(mSpannableStringBuilder, attributes, getMarginParagraph());
785            startCssStyle(mSpannableStringBuilder, attributes);
786        } else if (tag.equalsIgnoreCase("ul")) {
787            startBlockElement(mSpannableStringBuilder, attributes, getMarginList());
788        } else if (tag.equalsIgnoreCase("li")) {
789            startLi(mSpannableStringBuilder, attributes);
790        } else if (tag.equalsIgnoreCase("div")) {
791            startBlockElement(mSpannableStringBuilder, attributes, getMarginDiv());
792        } else if (tag.equalsIgnoreCase("span")) {
793            startCssStyle(mSpannableStringBuilder, attributes);
794        } else if (tag.equalsIgnoreCase("strong")) {
795            start(mSpannableStringBuilder, new Bold());
796        } else if (tag.equalsIgnoreCase("b")) {
797            start(mSpannableStringBuilder, new Bold());
798        } else if (tag.equalsIgnoreCase("em")) {
799            start(mSpannableStringBuilder, new Italic());
800        } else if (tag.equalsIgnoreCase("cite")) {
801            start(mSpannableStringBuilder, new Italic());
802        } else if (tag.equalsIgnoreCase("dfn")) {
803            start(mSpannableStringBuilder, new Italic());
804        } else if (tag.equalsIgnoreCase("i")) {
805            start(mSpannableStringBuilder, new Italic());
806        } else if (tag.equalsIgnoreCase("big")) {
807            start(mSpannableStringBuilder, new Big());
808        } else if (tag.equalsIgnoreCase("small")) {
809            start(mSpannableStringBuilder, new Small());
810        } else if (tag.equalsIgnoreCase("font")) {
811            startFont(mSpannableStringBuilder, attributes);
812        } else if (tag.equalsIgnoreCase("blockquote")) {
813            startBlockquote(mSpannableStringBuilder, attributes);
814        } else if (tag.equalsIgnoreCase("tt")) {
815            start(mSpannableStringBuilder, new Monospace());
816        } else if (tag.equalsIgnoreCase("a")) {
817            startA(mSpannableStringBuilder, attributes);
818        } else if (tag.equalsIgnoreCase("u")) {
819            start(mSpannableStringBuilder, new Underline());
820        } else if (tag.equalsIgnoreCase("del")) {
821            start(mSpannableStringBuilder, new Strikethrough());
822        } else if (tag.equalsIgnoreCase("s")) {
823            start(mSpannableStringBuilder, new Strikethrough());
824        } else if (tag.equalsIgnoreCase("strike")) {
825            start(mSpannableStringBuilder, new Strikethrough());
826        } else if (tag.equalsIgnoreCase("sup")) {
827            start(mSpannableStringBuilder, new Super());
828        } else if (tag.equalsIgnoreCase("sub")) {
829            start(mSpannableStringBuilder, new Sub());
830        } else if (tag.length() == 2 &&
831                Character.toLowerCase(tag.charAt(0)) == 'h' &&
832                tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
833            startHeading(mSpannableStringBuilder, attributes, tag.charAt(1) - '1');
834        } else if (tag.equalsIgnoreCase("img")) {
835            startImg(mSpannableStringBuilder, attributes, mImageGetter);
836        } else if (mTagHandler != null) {
837            mTagHandler.handleTag(true, tag, mSpannableStringBuilder, mReader);
838        }
839    }
840
841    private void handleEndTag(String tag) {
842        if (tag.equalsIgnoreCase("br")) {
843            handleBr(mSpannableStringBuilder);
844        } else if (tag.equalsIgnoreCase("p")) {
845            endCssStyle(mSpannableStringBuilder);
846            endBlockElement(mSpannableStringBuilder);
847        } else if (tag.equalsIgnoreCase("ul")) {
848            endBlockElement(mSpannableStringBuilder);
849        } else if (tag.equalsIgnoreCase("li")) {
850            endLi(mSpannableStringBuilder);
851        } else if (tag.equalsIgnoreCase("div")) {
852            endBlockElement(mSpannableStringBuilder);
853        } else if (tag.equalsIgnoreCase("span")) {
854            endCssStyle(mSpannableStringBuilder);
855        } else if (tag.equalsIgnoreCase("strong")) {
856            end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
857        } else if (tag.equalsIgnoreCase("b")) {
858            end(mSpannableStringBuilder, Bold.class, new StyleSpan(Typeface.BOLD));
859        } else if (tag.equalsIgnoreCase("em")) {
860            end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
861        } else if (tag.equalsIgnoreCase("cite")) {
862            end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
863        } else if (tag.equalsIgnoreCase("dfn")) {
864            end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
865        } else if (tag.equalsIgnoreCase("i")) {
866            end(mSpannableStringBuilder, Italic.class, new StyleSpan(Typeface.ITALIC));
867        } else if (tag.equalsIgnoreCase("big")) {
868            end(mSpannableStringBuilder, Big.class, new RelativeSizeSpan(1.25f));
869        } else if (tag.equalsIgnoreCase("small")) {
870            end(mSpannableStringBuilder, Small.class, new RelativeSizeSpan(0.8f));
871        } else if (tag.equalsIgnoreCase("font")) {
872            endFont(mSpannableStringBuilder);
873        } else if (tag.equalsIgnoreCase("blockquote")) {
874            endBlockquote(mSpannableStringBuilder);
875        } else if (tag.equalsIgnoreCase("tt")) {
876            end(mSpannableStringBuilder, Monospace.class, new TypefaceSpan("monospace"));
877        } else if (tag.equalsIgnoreCase("a")) {
878            endA(mSpannableStringBuilder);
879        } else if (tag.equalsIgnoreCase("u")) {
880            end(mSpannableStringBuilder, Underline.class, new UnderlineSpan());
881        } else if (tag.equalsIgnoreCase("del")) {
882            end(mSpannableStringBuilder, Strikethrough.class, new StrikethroughSpan());
883        } else if (tag.equalsIgnoreCase("s")) {
884            end(mSpannableStringBuilder, Strikethrough.class, new StrikethroughSpan());
885        } else if (tag.equalsIgnoreCase("strike")) {
886            end(mSpannableStringBuilder, Strikethrough.class, new StrikethroughSpan());
887        } else if (tag.equalsIgnoreCase("sup")) {
888            end(mSpannableStringBuilder, Super.class, new SuperscriptSpan());
889        } else if (tag.equalsIgnoreCase("sub")) {
890            end(mSpannableStringBuilder, Sub.class, new SubscriptSpan());
891        } else if (tag.length() == 2 &&
892                Character.toLowerCase(tag.charAt(0)) == 'h' &&
893                tag.charAt(1) >= '1' && tag.charAt(1) <= '6') {
894            endHeading(mSpannableStringBuilder);
895        } else if (mTagHandler != null) {
896            mTagHandler.handleTag(false, tag, mSpannableStringBuilder, mReader);
897        }
898    }
899
900    private int getMarginParagraph() {
901        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_PARAGRAPH);
902    }
903
904    private int getMarginHeading() {
905        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_HEADING);
906    }
907
908    private int getMarginListItem() {
909        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_LIST_ITEM);
910    }
911
912    private int getMarginList() {
913        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_LIST);
914    }
915
916    private int getMarginDiv() {
917        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_DIV);
918    }
919
920    private int getMarginBlockquote() {
921        return getMargin(Html.FROM_HTML_SEPARATOR_LINE_BREAK_BLOCKQUOTE);
922    }
923
924    /**
925     * Returns the minimum number of newline characters needed before and after a given block-level
926     * element.
927     *
928     * @param flag the corresponding option flag defined in {@link Html} of a block-level element
929     */
930    private int getMargin(int flag) {
931        if ((flag & mFlags) != 0) {
932            return 1;
933        }
934        return 2;
935    }
936
937    private static void appendNewlines(Editable text, int minNewline) {
938        final int len = text.length();
939
940        if (len == 0) {
941            return;
942        }
943
944        int existingNewlines = 0;
945        for (int i = len - 1; i >= 0 && text.charAt(i) == '\n'; i--) {
946            existingNewlines++;
947        }
948
949        for (int j = existingNewlines; j < minNewline; j++) {
950            text.append("\n");
951        }
952    }
953
954    private static void startBlockElement(Editable text, Attributes attributes, int margin) {
955        final int len = text.length();
956        if (margin > 0) {
957            appendNewlines(text, margin);
958            start(text, new Newline(margin));
959        }
960
961        String style = attributes.getValue("", "style");
962        if (style != null) {
963            Matcher m = getTextAlignPattern().matcher(style);
964            if (m.find()) {
965                String alignment = m.group(1);
966                if (alignment.equalsIgnoreCase("start")) {
967                    start(text, new Alignment(Layout.Alignment.ALIGN_NORMAL));
968                } else if (alignment.equalsIgnoreCase("center")) {
969                    start(text, new Alignment(Layout.Alignment.ALIGN_CENTER));
970                } else if (alignment.equalsIgnoreCase("end")) {
971                    start(text, new Alignment(Layout.Alignment.ALIGN_OPPOSITE));
972                }
973            }
974        }
975    }
976
977    private static void endBlockElement(Editable text) {
978        Newline n = getLast(text, Newline.class);
979        if (n != null) {
980            appendNewlines(text, n.mNumNewlines);
981            text.removeSpan(n);
982        }
983
984        Alignment a = getLast(text, Alignment.class);
985        if (a != null) {
986            setSpanFromMark(text, a, new AlignmentSpan.Standard(a.mAlignment));
987        }
988    }
989
990    private static void handleBr(Editable text) {
991        text.append('\n');
992    }
993
994    private void startLi(Editable text, Attributes attributes) {
995        startBlockElement(text, attributes, getMarginListItem());
996        start(text, new Bullet());
997        startCssStyle(text, attributes);
998    }
999
1000    private static void endLi(Editable text) {
1001        endCssStyle(text);
1002        endBlockElement(text);
1003        end(text, Bullet.class, new BulletSpan());
1004    }
1005
1006    private void startBlockquote(Editable text, Attributes attributes) {
1007        startBlockElement(text, attributes, getMarginBlockquote());
1008        start(text, new Blockquote());
1009    }
1010
1011    private static void endBlockquote(Editable text) {
1012        endBlockElement(text);
1013        end(text, Blockquote.class, new QuoteSpan());
1014    }
1015
1016    private void startHeading(Editable text, Attributes attributes, int level) {
1017        startBlockElement(text, attributes, getMarginHeading());
1018        start(text, new Heading(level));
1019    }
1020
1021    private static void endHeading(Editable text) {
1022        // RelativeSizeSpan and StyleSpan are CharacterStyles
1023        // Their ranges should not include the newlines at the end
1024        Heading h = getLast(text, Heading.class);
1025        if (h != null) {
1026            setSpanFromMark(text, h, new RelativeSizeSpan(HEADING_SIZES[h.mLevel]),
1027                    new StyleSpan(Typeface.BOLD));
1028        }
1029
1030        endBlockElement(text);
1031    }
1032
1033    private static <T> T getLast(Spanned text, Class<T> kind) {
1034        /*
1035         * This knows that the last returned object from getSpans()
1036         * will be the most recently added.
1037         */
1038        T[] objs = text.getSpans(0, text.length(), kind);
1039
1040        if (objs.length == 0) {
1041            return null;
1042        } else {
1043            return objs[objs.length - 1];
1044        }
1045    }
1046
1047    private static void setSpanFromMark(Spannable text, Object mark, Object... spans) {
1048        int where = text.getSpanStart(mark);
1049        text.removeSpan(mark);
1050        int len = text.length();
1051        if (where != len) {
1052            for (Object span : spans) {
1053                text.setSpan(span, where, len, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
1054            }
1055        }
1056    }
1057
1058    private static void start(Editable text, Object mark) {
1059        int len = text.length();
1060        text.setSpan(mark, len, len, Spannable.SPAN_INCLUSIVE_EXCLUSIVE);
1061    }
1062
1063    private static void end(Editable text, Class kind, Object repl) {
1064        int len = text.length();
1065        Object obj = getLast(text, kind);
1066        if (obj != null) {
1067            setSpanFromMark(text, obj, repl);
1068        }
1069    }
1070
1071    private void startCssStyle(Editable text, Attributes attributes) {
1072        String style = attributes.getValue("", "style");
1073        if (style != null) {
1074            Matcher m = getForegroundColorPattern().matcher(style);
1075            if (m.find()) {
1076                int c = getHtmlColor(m.group(1));
1077                if (c != -1) {
1078                    start(text, new Foreground(c | 0xFF000000));
1079                }
1080            }
1081
1082            m = getBackgroundColorPattern().matcher(style);
1083            if (m.find()) {
1084                int c = getHtmlColor(m.group(1));
1085                if (c != -1) {
1086                    start(text, new Background(c | 0xFF000000));
1087                }
1088            }
1089
1090            m = getTextDecorationPattern().matcher(style);
1091            if (m.find()) {
1092                String textDecoration = m.group(1);
1093                if (textDecoration.equalsIgnoreCase("line-through")) {
1094                    start(text, new Strikethrough());
1095                }
1096            }
1097        }
1098    }
1099
1100    private static void endCssStyle(Editable text) {
1101        Strikethrough s = getLast(text, Strikethrough.class);
1102        if (s != null) {
1103            setSpanFromMark(text, s, new StrikethroughSpan());
1104        }
1105
1106        Background b = getLast(text, Background.class);
1107        if (b != null) {
1108            setSpanFromMark(text, b, new BackgroundColorSpan(b.mBackgroundColor));
1109        }
1110
1111        Foreground f = getLast(text, Foreground.class);
1112        if (f != null) {
1113            setSpanFromMark(text, f, new ForegroundColorSpan(f.mForegroundColor));
1114        }
1115    }
1116
1117    private static void startImg(Editable text, Attributes attributes, Html.ImageGetter img) {
1118        String src = attributes.getValue("", "src");
1119        Drawable d = null;
1120
1121        if (img != null) {
1122            d = img.getDrawable(src);
1123        }
1124
1125        if (d == null) {
1126            d = Resources.getSystem().
1127                    getDrawable(com.android.internal.R.drawable.unknown_image);
1128            d.setBounds(0, 0, d.getIntrinsicWidth(), d.getIntrinsicHeight());
1129        }
1130
1131        int len = text.length();
1132        text.append("\uFFFC");
1133
1134        text.setSpan(new ImageSpan(d, src), len, text.length(),
1135                     Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
1136    }
1137
1138    private void startFont(Editable text, Attributes attributes) {
1139        String color = attributes.getValue("", "color");
1140        String face = attributes.getValue("", "face");
1141
1142        if (!TextUtils.isEmpty(color)) {
1143            int c = getHtmlColor(color);
1144            if (c != -1) {
1145                start(text, new Foreground(c | 0xFF000000));
1146            }
1147        }
1148
1149        if (!TextUtils.isEmpty(face)) {
1150            start(text, new Font(face));
1151        }
1152    }
1153
1154    private static void endFont(Editable text) {
1155        Font font = getLast(text, Font.class);
1156        if (font != null) {
1157            setSpanFromMark(text, font, new TypefaceSpan(font.mFace));
1158        }
1159
1160        Foreground foreground = getLast(text, Foreground.class);
1161        if (foreground != null) {
1162            setSpanFromMark(text, foreground,
1163                    new ForegroundColorSpan(foreground.mForegroundColor));
1164        }
1165    }
1166
1167    private static void startA(Editable text, Attributes attributes) {
1168        String href = attributes.getValue("", "href");
1169        start(text, new Href(href));
1170    }
1171
1172    private static void endA(Editable text) {
1173        Href h = getLast(text, Href.class);
1174        if (h != null) {
1175            if (h.mHref != null) {
1176                setSpanFromMark(text, h, new URLSpan((h.mHref)));
1177            }
1178        }
1179    }
1180
1181    private int getHtmlColor(String color) {
1182        if ((mFlags & Html.FROM_HTML_OPTION_USE_CSS_COLORS)
1183                == Html.FROM_HTML_OPTION_USE_CSS_COLORS) {
1184            Integer i = sColorMap.get(color.toLowerCase(Locale.US));
1185            if (i != null) {
1186                return i;
1187            }
1188        }
1189        return Color.getHtmlColor(color);
1190    }
1191
1192    public void setDocumentLocator(Locator locator) {
1193    }
1194
1195    public void startDocument() throws SAXException {
1196    }
1197
1198    public void endDocument() throws SAXException {
1199    }
1200
1201    public void startPrefixMapping(String prefix, String uri) throws SAXException {
1202    }
1203
1204    public void endPrefixMapping(String prefix) throws SAXException {
1205    }
1206
1207    public void startElement(String uri, String localName, String qName, Attributes attributes)
1208            throws SAXException {
1209        handleStartTag(localName, attributes);
1210    }
1211
1212    public void endElement(String uri, String localName, String qName) throws SAXException {
1213        handleEndTag(localName);
1214    }
1215
1216    public void characters(char ch[], int start, int length) throws SAXException {
1217        StringBuilder sb = new StringBuilder();
1218
1219        /*
1220         * Ignore whitespace that immediately follows other whitespace;
1221         * newlines count as spaces.
1222         */
1223
1224        for (int i = 0; i < length; i++) {
1225            char c = ch[i + start];
1226
1227            if (c == ' ' || c == '\n') {
1228                char pred;
1229                int len = sb.length();
1230
1231                if (len == 0) {
1232                    len = mSpannableStringBuilder.length();
1233
1234                    if (len == 0) {
1235                        pred = '\n';
1236                    } else {
1237                        pred = mSpannableStringBuilder.charAt(len - 1);
1238                    }
1239                } else {
1240                    pred = sb.charAt(len - 1);
1241                }
1242
1243                if (pred != ' ' && pred != '\n') {
1244                    sb.append(' ');
1245                }
1246            } else {
1247                sb.append(c);
1248            }
1249        }
1250
1251        mSpannableStringBuilder.append(sb);
1252    }
1253
1254    public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {
1255    }
1256
1257    public void processingInstruction(String target, String data) throws SAXException {
1258    }
1259
1260    public void skippedEntity(String name) throws SAXException {
1261    }
1262
1263    private static class Bold { }
1264    private static class Italic { }
1265    private static class Underline { }
1266    private static class Strikethrough { }
1267    private static class Big { }
1268    private static class Small { }
1269    private static class Monospace { }
1270    private static class Blockquote { }
1271    private static class Super { }
1272    private static class Sub { }
1273    private static class Bullet { }
1274
1275    private static class Font {
1276        public String mFace;
1277
1278        public Font(String face) {
1279            mFace = face;
1280        }
1281    }
1282
1283    private static class Href {
1284        public String mHref;
1285
1286        public Href(String href) {
1287            mHref = href;
1288        }
1289    }
1290
1291    private static class Foreground {
1292        private int mForegroundColor;
1293
1294        public Foreground(int foregroundColor) {
1295            mForegroundColor = foregroundColor;
1296        }
1297    }
1298
1299    private static class Background {
1300        private int mBackgroundColor;
1301
1302        public Background(int backgroundColor) {
1303            mBackgroundColor = backgroundColor;
1304        }
1305    }
1306
1307    private static class Heading {
1308        private int mLevel;
1309
1310        public Heading(int level) {
1311            mLevel = level;
1312        }
1313    }
1314
1315    private static class Newline {
1316        private int mNumNewlines;
1317
1318        public Newline(int numNewlines) {
1319            mNumNewlines = numNewlines;
1320        }
1321    }
1322
1323    private static class Alignment {
1324        private Layout.Alignment mAlignment;
1325
1326        public Alignment(Layout.Alignment alignment) {
1327            mAlignment = alignment;
1328        }
1329    }
1330}
1331