17922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein/* 27922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Copyright (C) 2010 The Android Open Source Project 37922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 47922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Licensed under the Apache License, Version 2.0 (the "License"); 57922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * you may not use this file except in compliance with the License. 67922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * You may obtain a copy of the License at 77922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 87922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * http://www.apache.org/licenses/LICENSE-2.0 97922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Unless required by applicable law or agreed to in writing, software 117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * distributed under the License is distributed on an "AS IS" BASIS, 127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * See the License for the specific language governing permissions and 147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * limitations under the License. 157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinpackage com.android.emailcommon.utility; 187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport com.google.common.annotations.VisibleForTesting; 207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.graphics.Color; 227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.text.Spannable; 237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.text.SpannableString; 247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.text.SpannableStringBuilder; 257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.text.TextUtils; 267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport android.text.style.BackgroundColorSpan; 277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport java.io.IOException; 297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport java.util.ArrayList; 307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport java.util.HashMap; 317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport java.util.Map; 327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinimport java.util.StringTokenizer; 337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sappersteinpublic class TextUtilities { 357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Highlight color is yellow, as in other apps. 367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // TODO Push for this to be a global (style-related?) constant 377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static final int HIGHLIGHT_COLOR_INT = Color.YELLOW; 387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We AND off the "alpha" from the color (i.e. 0xFFFFFF00 -> 0x00FFFF00) 397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /*package*/ static final String HIGHLIGHT_COLOR_STRING = 407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein '#' + Integer.toHexString(HIGHLIGHT_COLOR_INT & 0x00FFFFFF); 417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // This is how many chars we'll allow in a snippet 437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein private static final int MAX_SNIPPET_LENGTH = 200; 447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // For some reason, isWhitespace() returns false with the following... 457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /*package*/ static final char NON_BREAKING_SPACE_CHARACTER = (char)160; 467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Tags whose content must be stripped as well 487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static final String[] STRIP_TAGS = 497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein new String[] {"title", "script", "style", "applet", "head"}; 507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // The number of characters we peel off for testing against STRIP_TAGS; this should be the 517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // maximum size of the strings in STRIP_TAGS 527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static final int MAX_STRIP_TAG_LENGTH = 6; 537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static final Map<String, Character> ESCAPE_STRINGS; 557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static { 567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // HTML character entity references as defined in HTML 4 577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // see http://www.w3.org/TR/REC-html40/sgml/entities.html 587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS = new HashMap<String, Character>(252); 597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put(" ", '\u00A0'); 617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¡", '\u00A1'); 627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¢", '\u00A2'); 637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("£", '\u00A3'); 647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¤", '\u00A4'); 657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¥", '\u00A5'); 667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¦", '\u00A6'); 677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("§", '\u00A7'); 687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¨", '\u00A8'); 697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("©", '\u00A9'); 707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ª", '\u00AA'); 717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("«", '\u00AB'); 727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¬", '\u00AC'); 737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("­", '\u00AD'); 747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("®", '\u00AE'); 757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¯", '\u00AF'); 767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("°", '\u00B0'); 777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("±", '\u00B1'); 787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("²", '\u00B2'); 797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("³", '\u00B3'); 807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("´", '\u00B4'); 817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("µ", '\u00B5'); 827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¶", '\u00B6'); 837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("·", '\u00B7'); 847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¸", '\u00B8'); 857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¹", '\u00B9'); 867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("º", '\u00BA'); 877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("»", '\u00BB'); 887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¼", '\u00BC'); 897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("½", '\u00BD'); 907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¾", '\u00BE'); 917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¿", '\u00BF'); 927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("À", '\u00C0'); 937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Á", '\u00C1'); 947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Â", '\u00C2'); 957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ã", '\u00C3'); 967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ä", '\u00C4'); 977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Å", '\u00C5'); 987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Æ", '\u00C6'); 997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ç", '\u00C7'); 1007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("È", '\u00C8'); 1017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("É", '\u00C9'); 1027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ê", '\u00CA'); 1037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ë", '\u00CB'); 1047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ì", '\u00CC'); 1057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Í", '\u00CD'); 1067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Î", '\u00CE'); 1077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ï", '\u00CF'); 1087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ð", '\u00D0'); 1097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ñ", '\u00D1'); 1107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ò", '\u00D2'); 1117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ó", '\u00D3'); 1127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ô", '\u00D4'); 1137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Õ", '\u00D5'); 1147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ö", '\u00D6'); 1157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("×", '\u00D7'); 1167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ø", '\u00D8'); 1177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ù", '\u00D9'); 1187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ú", '\u00DA'); 1197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Û", '\u00DB'); 1207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ü", '\u00DC'); 1217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Ý", '\u00DD'); 1227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("Þ", '\u00DE'); 1237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ß", '\u00DF'); 1247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("à", '\u00E0'); 1257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("á", '\u00E1'); 1267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("â", '\u00E2'); 1277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ã", '\u00E3'); 1287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ä", '\u00E4'); 1297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("å", '\u00E5'); 1307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("æ", '\u00E6'); 1317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ç", '\u00E7'); 1327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("è", '\u00E8'); 1337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("é", '\u00E9'); 1347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ê", '\u00EA'); 1357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ë", '\u00EB'); 1367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ì", '\u00EC'); 1377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("í", '\u00ED'); 1387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("î", '\u00EE'); 1397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ï", '\u00EF'); 1407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ð", '\u00F0'); 1417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ñ", '\u00F1'); 1427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ò", '\u00F2'); 1437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ó", '\u00F3'); 1447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ô", '\u00F4'); 1457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("õ", '\u00F5'); 1467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ö", '\u00F6'); 1477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("÷", '\u00F7'); 1487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ø", '\u00F8'); 1497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ù", '\u00F9'); 1507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ú", '\u00FA'); 1517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("û", '\u00FB'); 1527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ü", '\u00FC'); 1537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ý", '\u00FD'); 1547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("þ", '\u00FE'); 1557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("ÿ", '\u00FF'); 1567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&fnof", '\u0192'); 1577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Alpha", '\u0391'); 1587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Beta", '\u0392'); 1597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Gamma", '\u0393'); 1607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Delta", '\u0394'); 1617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Epsilon", '\u0395'); 1627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Zeta", '\u0396'); 1637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Eta", '\u0397'); 1647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Theta", '\u0398'); 1657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Iota", '\u0399'); 1667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Kappa", '\u039A'); 1677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Lambda", '\u039B'); 1687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Mu", '\u039C'); 1697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Nu", '\u039D'); 1707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Xi", '\u039E'); 1717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Omicron", '\u039F'); 1727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Pi", '\u03A0'); 1737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Rho", '\u03A1'); 1747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Sigma", '\u03A3'); 1757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Tau", '\u03A4'); 1767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Upsilon", '\u03A5'); 1777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Phi", '\u03A6'); 1787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Chi", '\u03A7'); 1797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Psi", '\u03A8'); 1807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Omega", '\u03A9'); 1817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&alpha", '\u03B1'); 1827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&beta", '\u03B2'); 1837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&gamma", '\u03B3'); 1847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&delta", '\u03B4'); 1857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&epsilon", '\u03B5'); 1867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&zeta", '\u03B6'); 1877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&eta", '\u03B7'); 1887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&theta", '\u03B8'); 1897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&iota", '\u03B9'); 1907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&kappa", '\u03BA'); 1917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lambda", '\u03BB'); 1927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&mu", '\u03BC'); 1937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&nu", '\u03BD'); 1947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&xi", '\u03BE'); 1957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&omicron", '\u03BF'); 1967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&pi", '\u03C0'); 1977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rho", '\u03C1'); 1987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sigmaf", '\u03C2'); 1997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sigma", '\u03C3'); 2007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&tau", '\u03C4'); 2017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&upsilon", '\u03C5'); 2027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&phi", '\u03C6'); 2037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&chi", '\u03C7'); 2047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&psi", '\u03C8'); 2057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&omega", '\u03C9'); 2067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&thetasym", '\u03D1'); 2077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&upsih", '\u03D2'); 2087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&piv", '\u03D6'); 2097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&bull", '\u2022'); 2107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&hellip", '\u2026'); 2117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&prime", '\u2032'); 2127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Prime", '\u2033'); 2137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&oline", '\u203E'); 2147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&frasl", '\u2044'); 2157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&weierp", '\u2118'); 2167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&image", '\u2111'); 2177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&real", '\u211C'); 2187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&trade", '\u2122'); 2197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&alefsym", '\u2135'); 2207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&larr", '\u2190'); 2217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&uarr", '\u2191'); 2227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rarr", '\u2192'); 2237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&darr", '\u2193'); 2247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&harr", '\u2194'); 2257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&crarr", '\u21B5'); 2267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lArr", '\u21D0'); 2277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&uArr", '\u21D1'); 2287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rArr", '\u21D2'); 2297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&dArr", '\u21D3'); 2307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&hArr", '\u21D4'); 2317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&forall", '\u2200'); 2327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&part", '\u2202'); 2337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&exist", '\u2203'); 2347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&empty", '\u2205'); 2357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&nabla", '\u2207'); 2367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&isin", '\u2208'); 2377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("¬in", '\u2209'); 2387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ni", '\u220B'); 2397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&prod", '\u220F'); 2407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sum", '\u2211'); 2417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&minus", '\u2212'); 2427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lowast", '\u2217'); 2437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&radic", '\u221A'); 2447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&prop", '\u221D'); 2457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&infin", '\u221E'); 2467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ang", '\u2220'); 2477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&and", '\u2227'); 2487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&or", '\u2228'); 2497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&cap", '\u2229'); 2507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&cup", '\u222A'); 2517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&int", '\u222B'); 2527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&there4", '\u2234'); 2537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sim", '\u223C'); 2547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&cong", '\u2245'); 2557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&asymp", '\u2248'); 2567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ne", '\u2260'); 2577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&equiv", '\u2261'); 2587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&le", '\u2264'); 2597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ge", '\u2265'); 2607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sub", '\u2282'); 2617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sup", '\u2283'); 2627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&nsub", '\u2284'); 2637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sube", '\u2286'); 2647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&supe", '\u2287'); 2657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&oplus", '\u2295'); 2667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&otimes", '\u2297'); 2677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&perp", '\u22A5'); 2687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sdot", '\u22C5'); 2697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lceil", '\u2308'); 2707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rceil", '\u2309'); 2717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lfloor", '\u230A'); 2727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rfloor", '\u230B'); 2737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lang", '\u2329'); 2747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rang", '\u232A'); 2757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&loz", '\u25CA'); 2767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&spades", '\u2660'); 2777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&clubs", '\u2663'); 2787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&hearts", '\u2665'); 2797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&diams", '\u2666'); 2807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put(""", '\u0022'); 2817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&", '\u0026'); 2827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("<", '\u003C'); 2837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put(">", '\u003E'); 2847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&OElig", '\u0152'); 2857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&oelig", '\u0153'); 2867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Scaron", '\u0160'); 2877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&scaron", '\u0161'); 2887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Yuml", '\u0178'); 2897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&circ", '\u02C6'); 2907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&tilde", '\u02DC'); 2917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ensp", '\u2002'); 2927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&emsp", '\u2003'); 2937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&thinsp", '\u2009'); 2947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&zwnj", '\u200C'); 2957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&zwj", '\u200D'); 2967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lrm", '\u200E'); 2977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rlm", '\u200F'); 2987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ndash", '\u2013'); 2997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&mdash", '\u2014'); 3007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lsquo", '\u2018'); 3017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rsquo", '\u2019'); 3027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&sbquo", '\u201A'); 3037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&ldquo", '\u201C'); 3047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rdquo", '\u201D'); 3057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&bdquo", '\u201E'); 3067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&dagger", '\u2020'); 3077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&Dagger", '\u2021'); 3087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&permil", '\u2030'); 3097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&lsaquo", '\u2039'); 3107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&rsaquo", '\u203A'); 3117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ESCAPE_STRINGS.put("&euro", '\u20AC'); 3127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 3157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Code to generate a short 'snippet' from either plain text or html text 3167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 3177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * If the sync protocol can get plain text, that's great, but we'll still strip out extraneous 3187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * whitespace. If it's HTML, we'll 1) strip out tags, 2) turn entities into the appropriate 3197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * characters, and 3) strip out extraneous whitespace, all in one pass 3207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 3217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Why not use an existing class? The best answer is performance; yet another answer is 3227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * correctness (e.g. Html.textFromHtml simply doesn't generate well-stripped text). But 3237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * performance is key; we frequently sync text that is 10K or (much) longer, yet we really only 3247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * care about a small amount of text for the snippet. So it's critically important that we just 3257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * stop when we've gotten enough; existing methods that exist will go through the entire 3267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * incoming string, at great (and useless, in this case) expense. 3277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 3287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static String makeSnippetFromHtmlText(String text) { 3307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return makeSnippetFromText(text, true); 3317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static String makeSnippetFromPlainText(String text) { 3347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return makeSnippetFromText(text, false); 3357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 3387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Find the end of this tag; there are two alternatives: <tag .../> or <tag ...> ... </tag> 3397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param htmlText some HTML text 3407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param tag the HTML tag 3417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param startPos the start position in the HTML text where the tag starts 3427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @return the position just before the end of the tag or -1 if not found 3437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 3447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /*package*/ static int findTagEnd(String htmlText, String tag, int startPos) { 3457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (tag.endsWith(" ")) { 3467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein tag = tag.substring(0, tag.length() - 1); 3477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int length = htmlText.length(); 3497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char prevChar = 0; 3507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (int i = startPos; i < length; i++) { 3517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char c = htmlText.charAt(i); 3527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (c == '>') { 3537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (prevChar == '/') { 3547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return i - 1; 3557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 3577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein prevChar = c; 3597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We didn't find /> at the end of the tag so find </tag> 3617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return htmlText.indexOf("/" + tag, startPos); 3627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 3637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static String makeSnippetFromText(String text, boolean stripHtml) { 3657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Handle null and empty string 3667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (TextUtils.isEmpty(text)) return ""; 3677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final int length = text.length(); 3697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Use char[] instead of StringBuilder purely for performance; fewer method calls, etc. 3707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char[] buffer = new char[MAX_SNIPPET_LENGTH]; 3717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // skipCount is an array of a single int; that int is set inside stripHtmlEntity and is 3727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // used to determine how many characters can be "skipped" due to the transformation of the 3737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // entity to a single character. When Java allows multiple return values, we can make this 3747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // much cleaner :-) 3757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int[] skipCount = new int[1]; 3767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int bufferCount = 0; 3777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Start with space as last character to avoid leading whitespace 3787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char last = ' '; 3797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Indicates whether we're in the middle of an HTML tag 3807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein boolean inTag = false; 3817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 3827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Walk through the text until we're done with the input OR we've got a large enough snippet 3837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (int i = 0; i < length && bufferCount < MAX_SNIPPET_LENGTH; i++) { 3847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char c = text.charAt(i); 3857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (stripHtml && !inTag && (c == '<')) { 3867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Find tags to strip; they will begin with <! or !- or </ or <letter 3877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (i < (length - 1)) { 3887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char peek = text.charAt(i + 1); 3897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) { 3907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein inTag = true; 3917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Strip content of title, script, style and applet tags 3927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) { 3937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1); 3947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String tagLowerCase = tag.toLowerCase(); 3957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein boolean stripContent = false; 3967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (String stripTag: STRIP_TAGS) { 3977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (tagLowerCase.startsWith(stripTag)) { 3987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein stripContent = true; 3997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein tag = tag.substring(0, stripTag.length()); 4007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 4017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (stripContent) { 4047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Look for the end of this tag 4057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int endTagPosition = findTagEnd(text, tag, i); 4067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (endTagPosition < 0) { 4077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 4087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 4097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein i = endTagPosition; 4107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (stripHtml && inTag && (c == '>')) { 4167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Terminate stripping here 4177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein inTag = false; 4187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein continue; 4197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 4217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (inTag) { 4227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We just skip by everything while we're in a tag 4237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein continue; 4247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (stripHtml && (c == '&')) { 4257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Handle a possible HTML entity here 4267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We always get back a character to use; we also get back a "skip count", 4277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // indicating how many characters were eaten from the entity 4287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein c = stripHtmlEntity(text, i, skipCount); 4297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein i += skipCount[0]; 4307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 4327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (Character.isWhitespace(c) || (c == NON_BREAKING_SPACE_CHARACTER)) { 4337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // The idea is to find the content in the message, not the whitespace, so we'll 4347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // turn any combination of contiguous whitespace into a single space 4357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (last == ' ') { 4367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein continue; 4377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 4387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Make every whitespace character a simple space 4397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein c = ' '; 4407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if ((c == '-' || c == '=') && (last == c)) { 4427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Lots of messages (especially digests) have whole lines of --- or === 4437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We'll get rid of those duplicates here 4447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein continue; 4457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 4477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // After all that, maybe we've got a character for our snippet 4487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein buffer[bufferCount++] = c; 4497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein last = c; 4507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 4527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Lose trailing space and return our snippet 4537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if ((bufferCount > 0) && (last == ' ')) { 4547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein bufferCount--; 4557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return new String(buffer, 0, bufferCount); 4577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 4597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static /*package*/ char stripHtmlEntity(String text, int pos, int[] skipCount) { 4607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int length = text.length(); 4617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Ugly, but we store our skip count in this array; we can't use a static here, because 4627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // multiple threads might be calling in 4637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein skipCount[0] = 0; 4647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // All entities are <= 8 characters long, so that's how far we'll look for one (+ & and ;) 4657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int end = pos + 10; 4667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String entity = null; 4677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Isolate the entity 4687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (int i = pos; (i < length) && (i < end); i++) { 4697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (text.charAt(i) == ';') { 4707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein entity = text.substring(pos, i); 4717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 4727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (entity == null) { 4757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // This wasn't really an HTML entity 4767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return '&'; 4777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 4787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Skip count is the length of the entity 4797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein Character mapping = ESCAPE_STRINGS.get(entity); 4807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int entityLength = entity.length(); 4817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (mapping != null) { 4827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein skipCount[0] = entityLength; 4837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return mapping; 4847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if ((entityLength > 2) && (entity.charAt(1) == '#')) { 4857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // &#nn; means ascii nn (decimal) and &#xnn means ascii nn (hex) 4867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char c = '?'; 4877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein try { 4887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int i; 4897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if ((entity.charAt(2) == 'x') && (entityLength > 3)) { 4907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein i = Integer.parseInt(entity.substring(3), 16); 4917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 4927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein i = Integer.parseInt(entity.substring(2)); 4937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein c = (char)i; 4957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } catch (NumberFormatException e) { 4967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We'll just return the ? in this case 4977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 4987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein skipCount[0] = entityLength; 4997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return c; 5007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Worst case, we return the original start character, ampersand 5037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return '&'; 5047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 5077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Given a string of HTML text and a query containing any number of search terms, returns 5087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * an HTML string in which those search terms are highlighted (intended for use in a WebView) 5097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 5107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param text the HTML text to process 5117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param query the search terms 5127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @return HTML text with the search terms highlighted 5137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 5147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein @VisibleForTesting 5157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static String highlightTermsInHtml(String text, String query) { 5167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein try { 5177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return highlightTerms(text, query, true).toString(); 5187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } catch (IOException e) { 5197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Can't happen, but we must catch this 5207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return text; 5217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 5257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Given a string of plain text and a query containing any number of search terms, returns 5267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * a CharSequence in which those search terms are highlighted (intended for use in a TextView) 5277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 5287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param text the text to process 5297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param query the search terms 5307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @return a CharSequence with the search terms highlighted 5317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 5327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static CharSequence highlightTermsInText(String text, String query) { 5337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein try { 5347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return highlightTerms(text, query, false); 5357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } catch (IOException e) { 5367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Can't happen, but we must catch this 5377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return text; 5387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein static class SearchTerm { 5427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final String mTerm; 5437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final String mTermLowerCase; 5447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final int mLength; 5457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int mMatchLength = 0; 5467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int mMatchStart = -1; 5477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5483b965d78774a42358ce6bbdcc43b4c8df130a60eScott Kennedy SearchTerm(String term) { 5497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein mTerm = term; 5507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein mTermLowerCase = term.toLowerCase(); 5517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein mLength = term.length(); 5527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 5567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Generate a version of the incoming text in which all search terms in a query are highlighted. 5577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * If the input is HTML, we return a StringBuilder with additional markup as required 5587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * If the input is text, we return a SpannableStringBuilder with additional spans as required 5597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 5607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param text the text to be processed 5617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param query the query, which can contain multiple terms separated by whitespace 5627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @param html whether or not the text to be processed is HTML 5637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @return highlighted text 5647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * 5657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * @throws IOException as Appendable requires this 5667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 5677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static CharSequence highlightTerms(String text, String query, boolean html) 5687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein throws IOException { 5697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Handle null and empty string 5707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (TextUtils.isEmpty(text)) return ""; 5717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final int length = text.length(); 5727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Break up the query into search terms 5747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein ArrayList<SearchTerm> terms = new ArrayList<SearchTerm>(); 5757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (query != null) { 5767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein StringTokenizer st = new StringTokenizer(query); 5777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein while (st.hasMoreTokens()) { 5783b965d78774a42358ce6bbdcc43b4c8df130a60eScott Kennedy terms.add(new SearchTerm(st.nextToken())); 5797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 5817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Our appendable depends on whether we're building HTML text (for webview) or spannable 5837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // text (for UI) 5847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein final Appendable sb = html ? new StringBuilder() : new SpannableStringBuilder(); 5857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Indicates whether we're in the middle of an HTML tag 5867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein boolean inTag = false; 5877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // The position of the last input character copied to output 5887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int lastOut = -1; 5897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 5907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Walk through the text until we're done with the input 5917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Just copy any HTML tags directly into the output; search for terms in the remaining text 5927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (int i = 0; i < length; i++) { 5937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char chr = text.charAt(i); 5947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (html) { 5957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (!inTag && (chr == '<')) { 5967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Find tags; they will begin with <! or !- or </ or <letter 5977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (i < (length - 1)) { 5987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char peek = text.charAt(i + 1); 5997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (peek == '!' || peek == '-' || peek == '/' || Character.isLetter(peek)) { 6007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein inTag = true; 6017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Skip content of title, script, style and applet tags 6027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (i < (length - (MAX_STRIP_TAG_LENGTH + 2))) { 6037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String tag = text.substring(i + 1, i + MAX_STRIP_TAG_LENGTH + 1); 6047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String tagLowerCase = tag.toLowerCase(); 6057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein boolean stripContent = false; 6067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (String stripTag: STRIP_TAGS) { 6077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (tagLowerCase.startsWith(stripTag)) { 6087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein stripContent = true; 6097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein tag = tag.substring(0, stripTag.length()); 6107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 6117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (stripContent) { 6147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Look for the end of this tag 6157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int endTagPosition = findTagEnd(text, tag, i); 6167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (endTagPosition < 0) { 6177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(text.substring(i)); 6187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein break; 6197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 6207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(text.substring(i, endTagPosition - 1)); 6217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein i = endTagPosition - 1; 6227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein chr = text.charAt(i); 6237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (inTag && (chr == '>')) { 6297922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein inTag = false; 6307922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6317922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 6327922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (inTag) { 6337922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(chr); 6347922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein continue; 6357922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6367922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6377922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 6387922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // After all that, we've got some "body" text 6397922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein char chrLowerCase = Character.toLowerCase(chr); 6407922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Whether or not the current character should be appended to the output; we inhibit 6417922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // this while any search terms match 6427922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein boolean appendNow = true; 6437922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Look through search terms for matches 6447922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (SearchTerm t: terms) { 6457922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (chrLowerCase == t.mTermLowerCase.charAt(t.mMatchLength)) { 6467922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (t.mMatchLength++ == 0) { 6477922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // New match start 6487922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein t.mMatchStart = i; 6497922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6507922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (t.mMatchLength == t.mLength) { 6517922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein String matchText = text.substring(t.mMatchStart, t.mMatchStart + t.mLength); 6527922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Completed match; add highlight and reset term 6537922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (t.mMatchStart <= lastOut) { 6547922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein matchText = text.substring(lastOut + 1, i + 1); 6557922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6567922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /*else*/ 6577922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (matchText.length() == 0) {} else 6587922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (html) { 6597922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append("<span style=\"background-color: " + HIGHLIGHT_COLOR_STRING + 6607922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein "\">"); 6617922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(matchText); 6627922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append("</span>"); 6637922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 6647922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein SpannableString highlightSpan = new SpannableString(matchText); 6657922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein highlightSpan.setSpan(new BackgroundColorSpan(HIGHLIGHT_COLOR_INT), 0, 6667922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein highlightSpan.length(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); 6677922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(highlightSpan); 6687922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6697922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein lastOut = t.mMatchStart + t.mLength - 1; 6707922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein t.mMatchLength = 0; 6717922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein t.mMatchStart = -1; 6727922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6737922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein appendNow = false; 6747922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else { 6757922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (t.mMatchStart >= 0) { 6767922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We're no longer matching; check for other matches in progress 6777922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int leastOtherStart = -1; 6787922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein for (SearchTerm ot: terms) { 6797922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Save away the lowest match start for other search terms 6807922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if ((ot != t) && (ot.mMatchStart >= 0) && ((leastOtherStart < 0) || 6817922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein (ot.mMatchStart <= leastOtherStart))) { 6827922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein leastOtherStart = ot.mMatchStart; 6837922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6847922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6857922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein int matchEnd = t.mMatchStart + t.mMatchLength; 6867922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (leastOtherStart < 0 || leastOtherStart > matchEnd) { 6877922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Append the whole thing 6887922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (t.mMatchStart > lastOut) { 6897922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(text.substring(t.mMatchStart, matchEnd)); 6907922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein lastOut = matchEnd; 6917922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 6927922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (leastOtherStart == t.mMatchStart) { 6937922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Ok to append the current char 6947922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (leastOtherStart < t.mMatchStart) { 6957922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // We're already covered by another search term, so don't append 6967922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein appendNow = false; 6977922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } else if (t.mMatchStart > lastOut) { 6987922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Append the piece of our term that's not already covered 6997922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(text.substring(t.mMatchStart, leastOtherStart)); 7007922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein lastOut = leastOtherStart; 7017922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7027922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7037922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein // Reset this term 7047922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein t.mMatchLength = 0; 7057922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein t.mMatchStart = -1; 7067922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7077922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7087922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 7097922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (appendNow) { 7107922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein sb.append(chr); 7117922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein lastOut = i; 7127922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7137922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7147922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 7157922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return (CharSequence)sb; 7167922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7177922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 7187922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein /** 7197922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * Determine whether two Strings (either of which might be null) are the same; this is true 7207922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein * when both are null or both are Strings that are equal. 7217922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein */ 7227922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein public static boolean stringOrNullEquals(String a, String b) { 7237922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (a == null && b == null) return true; 7247922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein if (a != null && b != null && a.equals(b)) return true; 7257922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein return false; 7267922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein } 7277922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein 7287922528d9d4b4926f1ed3e1322d14b8e00a03465Andrew Sapperstein} 729