BidiFormatter.java revision 77f6bada6f88acea9025afce3eb0127d45411798
1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Copyright (C) 2013 The Android Open Source Project 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Licensed under the Apache License, Version 2.0 (the "License"); 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * you may not use this file except in compliance with the License. 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * You may obtain a copy of the License at 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * http://www.apache.org/licenses/LICENSE-2.0 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unless required by applicable law or agreed to in writing, software 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * distributed under the License is distributed on an "AS IS" BASIS, 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * See the License for the specific language governing permissions and 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * limitations under the License. 15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 16b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 17b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianpackage android.support.v4.text.bidi; 18b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 19b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.text.TextDirectionHeuristicCompat; 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport android.support.v4.text.TextDirectionHeuristicsCompat; 21b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.text.TextUtilsCompat; 22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.view.ViewCompat; 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport java.util.Locale; 25b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport static android.support.v4.text.TextDirectionHeuristicsCompat.FIRSTSTRONG_LTR; 27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/** 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Utility class for formatting text for display in a potentially opposite-directionality context 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * without garbling. The directionality of the context is set at formatter creation and the 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality of the text can be either estimated or passed in when known. Provides the 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * following functionality: 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 1. Bidi Wrapping 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * When text in one language is mixed into a document in another, opposite-directionality language, 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * e.g. when an English business name is embedded in a Hebrew web page, both the inserted string 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * and the text surrounding it may be displayed incorrectly unless the inserted string is explicitly 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * separated from the surrounding text in a "wrapper" that: 39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * - Declares its directionality so that the string is displayed correctly. This can be done in HTML 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * markup (e.g. a 'span dir="rtl"' element) by {@link #spanWrap} and similar methods, or - only in 42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * contexts where markup can't be used - in Unicode bidi formatting codes by {@link #unicodeWrap} 43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * and similar methods. 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * - Isolates the string's directionality, so it does not unduly affect the surrounding content. 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Currently, this can only be done using invisible Unicode characters of the same direction as 47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the context (LRM or RLM) in addition to the directionality declaration above, thus "resetting" 48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the directionality to that of the context. The "reset" may need to be done at both ends of the 49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string. Without "reset" after the string, the string will "stick" to a number or logically 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * separate opposite-direction text that happens to follow it in-line (even if separated by 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * neutral content like spaces and punctuation). Without "reset" before the string, the same can 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * happen there, but only with more opposite-direction text, not a number. One approach is to 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "reset" the direction only after each string, on the theory that if the preceding opposite- 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * direction text is itself bidi-wrapped, the "reset" after it will prevent the sticking. (Doing 55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the "reset" only before each string definitely does not work because we do not want to require 56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * bidi-wrapping numbers, and a bidi-wrapped opposite-direction string could be followed by a 571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * number.) Still, the safest policy is to do the "reset" on both ends of each string, since RTL 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * message translations often contain untranslated Latin-script brand names and technical terms, 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * and one of these can be followed by a bidi-wrapped inserted value. On the other hand, when one 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * has such a message, it is best to do the "reset" manually in the message translation itself, 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * since the message's opposite-direction text could be followed by an inserted number, which we 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * would not bidi-wrap anyway. Thus, "reset" only after the string is the current default. In an 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * alternative to "reset", recent additions to the HTML, CSS, and Unicode standards allow the 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * isolation to be part of the directionality declaration. This form of isolation is better than 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "reset" because it takes less space, does not require knowing the context directionality, has a 66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * gentler effect than "reset", and protects both ends of the string. However, we do not yet allow 67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * using it because required platforms do not yet support it. 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Providing these wrapping services is the basic purpose of the bidi formatter. 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 2. Directionality estimation 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * How does one know whether a string about to be inserted into surrounding text has the same 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality? Well, in many cases, one knows that this must be the case when writing the code 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * doing the insertion, e.g. when a localized message is inserted into a localized page. In such 75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cases there is no need to involve the bidi formatter at all. In some other cases, it need not be 76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the same as the context, but is either constant (e.g. urls are always LTR) or otherwise known. 77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * In the remaining cases, e.g. when the string is user-entered or comes from a database, the 78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * language of the string (and thus its directionality) is not known a priori, and must be 79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * estimated at run-time. The bidi formatter can do this automatically using the default 80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * first-strong estimation algorithm. It can also be configured to use a custom directionality 81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * estimation object. 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 3. Escaping 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * When wrapping plain text - i.e. text that is not already HTML or HTML-escaped - in HTML markup, 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the text must first be HTML-escaped to prevent XSS attacks and other nasty business. This of 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * course is always true, but the escaping can not be done after the string has already been wrapped 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in markup, so the bidi formatter also serves as a last chance and includes escaping services. 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Thus, in a single call, the formatter will escape the input string as specified, determine its 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, and wrap it as necessary. It is then up to the caller to insert the return value 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in the output. 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangpublic final class BidiFormatter { 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The default text direction heuristic. 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static TextDirectionHeuristicCompat DEFAULT_TEXT_DIRECTION_HEURISTIC = FIRSTSTRONG_LTR; 99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unicode "Left-To-Right Embedding" (LRE) character. 102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final char LRE = '\u202A'; 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unicode "Right-To-Left Embedding" (RLE) character. 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final char RLE = '\u202B'; 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unicode "Pop Directional Formatting" (PDF) character. 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final char PDF = '\u202C'; 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unicode "Left-To-Right Mark" (LRM) character. 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final char LRM = '\u200E'; 119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unicode "Right-To-Left Mark" (RLM) character. 122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final char RLM = '\u200F'; 124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * String representation of LRM 127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String LRM_STRING = Character.toString(LRM); 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /* 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * String representation of RLM 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String RLM_STRING = Character.toString(RLM); 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "ltr" string constant. 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String LTR_STRING = "ltr"; 139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "rtl" string constant. 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String RTL_STRING = "rtl"; 144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "dir=\"ltr\"" string constant. 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String DIR_LTR_STRING = "dir=\"ltr\""; 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "dir=\"rtl\"" string constant. 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String DIR_RTL_STRING = "dir=\"rtl\""; 154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "right" string constant. 157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String RIGHT = "right"; 159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * "left" string constant. 162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String LEFT = "left"; 164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Empty string constant. 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final String EMPTY_STRING = ""; 169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * A class for building a BidiFormatter with non-default options. 172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public static final class Builder { 174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private boolean isRtlContext; 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private int flags; 176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private TextDirectionHeuristicCompat textDirectionHeuristicCompat; 177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Constructor. 180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public Builder() { 183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang initialize(isRtlLocale(Locale.getDefault())); 184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Constructor. 188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param rtlContext Whether the context directionality is RTL. 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public Builder(boolean rtlContext) { 192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang initialize(rtlContext); 193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Constructor. 197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param locale The context locale. 199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public Builder(Locale locale) { 201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang initialize(isRtlLocale(locale)); 202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Initializes the builder with the given context directionality and default options. 206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isRtlContext Whether the context is RTL or not. 208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private void initialize(boolean isRtlContext) { 210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.isRtlContext = isRtlContext; 211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang textDirectionHeuristicCompat = DEFAULT_TEXT_DIRECTION_HEURISTIC; 212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.flags = DEFAULT_FLAGS; 213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Specifies whether the BidiFormatter to be built should also "reset" directionality before 217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * a string being bidi-wrapped, not just after it. The default is false. 218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public Builder stereoReset(boolean stereoReset) { 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (stereoReset) { 221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang flags |= FLAG_STEREO_RESET; 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } else { 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang flags &= ~FLAG_STEREO_RESET; 224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return this; 226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Specifies the default directionality estimation algorithm to be used by the BidiFormatter. 230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * By default, uses the first-strong heuristic. 231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic the {@code TextDirectionHeuristic} to use. 233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return the builder itself. 234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public Builder setTextDirectionHeuristic(TextDirectionHeuristicCompat heuristic) { 236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.textDirectionHeuristicCompat = heuristic; 237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return this; 238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static BidiFormatter getDefaultInstanceFromContext(boolean isRtlContext) { 241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtlContext ? DEFAULT_RTL_INSTANCE : DEFAULT_LTR_INSTANCE; 242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return A BidiFormatter with the specified options. 246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public BidiFormatter build() { 248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (flags == DEFAULT_FLAGS && 249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang textDirectionHeuristicCompat == DEFAULT_TEXT_DIRECTION_HEURISTIC) { 250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return getDefaultInstanceFromContext(isRtlContext); 251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return new BidiFormatter(isRtlContext, flags, textDirectionHeuristicCompat); 253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang private static final int FLAG_STEREO_RESET = 2; 258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final int DEFAULT_FLAGS = FLAG_STEREO_RESET; 259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final BidiFormatter DEFAULT_LTR_INSTANCE = new BidiFormatter( 261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang false /* LTR context */, 262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DEFAULT_FLAGS, 263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DEFAULT_TEXT_DIRECTION_HEURISTIC); 264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final BidiFormatter DEFAULT_RTL_INSTANCE = new BidiFormatter( 266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang true /* RTL context */, 267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DEFAULT_FLAGS, 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DEFAULT_TEXT_DIRECTION_HEURISTIC); 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final boolean isRtlContext; 271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final int flags; 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final TextDirectionHeuristicCompat defaultTextDirectionHeuristicCompat; 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Factory for creating an instance of BidiFormatter given the context directionality. 276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param rtlContext Whether the context directionality is RTL. 278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public static BidiFormatter getInstance(boolean rtlContext) { 280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return new Builder(rtlContext).build(); 281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Factory for creating an instance of BidiFormatter given the context locale. 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param locale The context locale. 287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 288ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public static BidiFormatter getInstance(Locale locale) { 289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return new Builder(locale).build(); 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isRtlContext Whether the context directionality is RTL or not. 294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param flags The option flags. 295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The default text direction heuristic. 296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private BidiFormatter(boolean isRtlContext, int flags, TextDirectionHeuristicCompat heuristic) { 298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.isRtlContext = isRtlContext; 299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.flags = flags; 300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.defaultTextDirectionHeuristicCompat = heuristic; 301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Whether the context directionality is RTL 305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public boolean isRtlContext() { 307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtlContext; 308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Whether directionality "reset" should also be done before a string being 312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * bidi-wrapped, not just after it. 313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public boolean getStereoReset() { 315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (flags & FLAG_STEREO_RESET) != 0; 316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" if it is LTR. 320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String whose directionality is to be estimated. 322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise. 323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttrValue(String str) { 325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return dirAttrValue(isRtl(str)); 326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #dirAttrValue(String)}, but uses a given heuristic to estimate the 330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * {@code str}'s directionality. 331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String whose directionality is to be estimated. 333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s 334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality. 335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise. 336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttrValue(String str, TextDirectionHeuristicCompat heuristic) { 338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return dirAttrValue(heuristic.isRtl(str, 0, str.length())); 339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "rtl" if the given directionality is RTL, and "ltr" if it is LTR. 343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isRtl Whether the directionality is RTL or not. 345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "rtl" if the given directionality is RTL, and "ltr" otherwise. 346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttrValue(boolean isRtl) { 348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtl ? RTL_STRING : LTR_STRING; 349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on {@code str}'s estimated directionality, 353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * if it is not the same as the context directionality. Otherwise, returns the empty string. 354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String whose directionality is to be estimated. 356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR 357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * context; else, the empty string. 358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttr(String str) { 360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return dirAttr(isRtl(str)); 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #dirAttr(String)}, but uses a given heuristic to estimate the 365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * {@code str}'s directionality. 366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String whose directionality is to be estimated. 368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s 369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality. 370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR 371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * context; else, the empty string. 372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttr(String str, TextDirectionHeuristicCompat heuristic) { 374b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian return dirAttr(heuristic.isRtl(str, 0, str.length())); 375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on the given directionality, if it is not 379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the same as the context directionality. Otherwise, returns the empty string. 380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isRtl Whether the directionality is RTL or not 382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR 383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * context; else, the empty string. 384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String dirAttr(boolean isRtl) { 386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (isRtl != isRtlContext) ? (isRtl ? DIR_RTL_STRING : DIR_LTR_STRING) : EMPTY_STRING; 387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the 391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * overall or the exit directionality of a given string is opposite to the context directionality. 392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Putting this after the string (including its directionality declaration wrapping) prevents it 393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * from "sticking" to other opposite-directionality text or a number appearing after it inline 394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * with only neutral content in between. Otherwise returns the empty string. While the exit 395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality is determined by scanning the end of the string, the overall directionality is 396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * given explicitly in {@code dir}. 397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String after which the mark may need to appear. 399b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; 400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * else, the empty string. 401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String markAfter(String str) { 403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return markAfter(str, defaultTextDirectionHeuristicCompat); 404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #markAfter(String)}, but uses a given heuristic to estimate the 408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * {@code str}'s directionality. 409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 41091037db265ecdd914a26e056cf69207b4f50924ehkuang * @param str String after which the mark may need to appear. 411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s 412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality. 413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; 414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * else, the empty string. 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String markAfter(String str, TextDirectionHeuristicCompat heuristic) { 417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang final boolean isRtl = heuristic.isRtl(str, 0, str.length()); 418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // getExitDir() is called only if needed (short-circuit). 419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!isRtlContext && (isRtl || getExitDir(str) == DIR_RTL)) { 420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return LRM_STRING; 421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (isRtlContext && (!isRtl || getExitDir(str) == DIR_LTR)) { 423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return RLM_STRING; 424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return EMPTY_STRING; 426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the 430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * overall or the entry directionality of a given string is opposite to the context 431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality. Putting this before the string (including its directionality declaration 432ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before it 433ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * inline with only neutral content in between. Otherwise returns the empty string. While the 434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * entry directionality is determined by scanning the beginning of the string, the overall 435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality is given explicitly in {@code dir}. 436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String before which the mark may need to appear. 438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; 439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * else, the empty string. 440ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String markBefore(String str) { 442ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return markBefore(str, defaultTextDirectionHeuristicCompat); 443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #markBefore(String)}, but uses a given heuristic to estimate the 447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * {@code str}'s directionality. 448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String before which the mark may need to appear. 450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s 451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality. 452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; 453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * else, the empty string. 454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String markBefore(String str, TextDirectionHeuristicCompat heuristic) { 456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang final boolean isRtl = heuristic.isRtl(str, 0, str.length()); 457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // getEntryDir() is called only if needed (short-circuit). 458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!isRtlContext && (isRtl || getEntryDir(str) == DIR_RTL)) { 459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return LRM_STRING; 460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (isRtlContext && (!isRtl || getEntryDir(str) == DIR_LTR)) { 462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return RLM_STRING; 463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return EMPTY_STRING; 465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 468ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns the Unicode bidi mark matching the context directionality (LRM for LTR context 469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, RLM for RTL context directionality). 470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String mark() { 472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtlContext ? RLM_STRING : LRM_STRING; 473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "right" for RTL context directionality. Otherwise for LTR context directionality 477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * returns "left". 478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String startEdge() { 480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtlContext ? RIGHT : LEFT; 481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns "left" for RTL context directionality. Otherwise for LTR context directionality 485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * returns "right". 486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String endEdge() { 488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return isRtlContext ? LEFT : RIGHT; 489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Estimates the directionality of a string using the default text direction heuristic. 493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str String whose directionality is to be estimated. 495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return true if {@code str}'s estimated overall directionality is RTL. Otherwise returns 496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * false. 497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public boolean isRtl(String str) { 499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return defaultTextDirectionHeuristicCompat.isRtl(str, 0, str.length()); 500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Formats a given string of unknown directionality for use in HTML output of the context 504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, so an opposite-directionality string is neither garbled nor garbles its 505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * surroundings. 506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The algorithm: estimates the directionality of the given string using the given heuristic. 508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * If the directionality is known, pass TextDirectionHeuristics.LTR or RTL for heuristic. 509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * In case its directionality doesn't match the context directionality, wraps it with a 'span' 510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * element and adds a "dir" attribute (either 'dir=\"rtl\"' or 'dir=\"ltr\"'). 511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * If {@code isolate}, directionally isolates the string so that it does not garble its 513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * surroundings. Currently, this is done by "resetting" the directionality after the string by 514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when 515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * either the overall directionality or the exit directionality of the string is opposite to that 516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and 517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * passing "true" as an argument, also prepends a Unicode bidi mark matching the context 518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality when either the overall directionality or the entry directionality of the 519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string is opposite to that of the context. 520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The algorithm to be used to estimate the string's overall direction. 524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isolate Whether to directionally isolate the string to prevent it from garbling the 525ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * content around it. 526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Input string after applying the above processing. 527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String spanWrap(String str, TextDirectionHeuristicCompat heuristic, boolean isolate) { 529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang final boolean isRtl = heuristic.isRtl(str, 0, str.length()); 530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang String origStr = str; 531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang str = TextUtilsCompat.htmlEncode(str); 532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang StringBuilder result = new StringBuilder(); 534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (getStereoReset() && isolate) { 535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(markBefore(origStr, 536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR)); 537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (isRtl != isRtlContext) { 539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append("<span ").append(dirAttr(isRtl)).append('>').append(str).append("</span>"); 540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } else { 541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(str); 542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (isolate) { 544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(markAfter(origStr, 545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR)); 546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return result.toString(); 548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but assumes 552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * {@code isolate} is true. 553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param heuristic The algorithm to be used to estimate the string's overall direction. 556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Input string after applying the above processing. 557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String spanWrap(String str, TextDirectionHeuristicCompat heuristic) { 559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return spanWrap(str, heuristic, true /* isolate */); 560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the 564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * formatter's default direction estimation algorithm. 565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isolate Whether to directionally isolate the string to prevent it from garbling the 568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * content around it 569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Input string after applying the above processing. 570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String spanWrap(String str, boolean isolate) { 572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return spanWrap(str, defaultTextDirectionHeuristicCompat, isolate); 573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the 577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * formatter's default direction estimation algorithm and assumes {@code isolate} is true. 578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return Input string after applying the above processing. 581ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 582ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String spanWrap(String str) { 583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return spanWrap(str, defaultTextDirectionHeuristicCompat, true /* isolate */); 584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Formats a string of given directionality for use in plain-text output of the context 588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, so an opposite-directionality string is neither garbled nor garbles its 589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * surroundings. As opposed to {@link #spanWrap}, this makes use of Unicode bidi 590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * formatting characters. In HTML, its *only* valid use is inside of elements that do not allow 591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * markup, e.g. the 'option' and 'title' elements. 592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The algorithm: In case the given directionality doesn't match the context directionality, wraps 594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the string with Unicode bidi formatting characters: RLE+{@code str}+PDF for RTL text, or 595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * LRE+{@code str}+PDF for LTR text. 596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 597ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * If {@code isolate}, directionally isolates the string so that it does not garble its 598ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * surroundings. Currently, this is done by "resetting" the directionality after the string by 599ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when 600ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * either the overall directionality or the exit directionality of the string is opposite to that 601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and 602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * passing "true" as an argument, also prepends a Unicode bidi mark matching the context 603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality when either the overall directionality or the entry directionality of the 604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string is opposite to that of the context. Note that as opposed to the overall 605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, the entry and exit directionalities are determined from the string itself. 606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p> 607ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Does *not* do HTML-escaping. 6081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * 6091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * @param str The input string. 610b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @param heuristic The algorithm to be used to estimate the string's overall direction. 611b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @param isolate Whether to directionally isolate the string to prevent it from garbling the 612b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * content around it 613b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @return Input string after applying the above processing. 614ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 6151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang public String unicodeWrap(String str, TextDirectionHeuristicCompat heuristic, boolean isolate) { 616ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang final boolean isRtl = heuristic.isRtl(str, 0, str.length()); 617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang StringBuilder result = new StringBuilder(); 618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (getStereoReset() && isolate) { 6191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang result.append(markBefore(str, 620ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR)); 621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 622ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (isRtl != isRtlContext) { 623b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian result.append(isRtl ? RLE : LRE); 624ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(str); 625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(PDF); 626b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian } else { 627b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian result.append(str); 628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 629b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (isolate) { 630ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang result.append(markAfter(str, 631b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR)); 632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return result.toString(); 634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 635b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 636b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian /** 637b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but assumes 638b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * {@code isolate} is true. 639ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 641b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @param heuristic The algorithm to be used to estimate the string's overall direction. 642b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * @return Input string after applying the above processing. 643b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian */ 644ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String unicodeWrap(String str, TextDirectionHeuristicCompat heuristic) { 645ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return unicodeWrap(str, heuristic, true /* isolate */); 646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * formatter's default direction estimation algorithm. 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * @param str The input string. 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * @param isolate Whether to directionally isolate the string to prevent it from garbling the 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * content around it 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * @return Input string after applying the above processing. 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 657b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian public String unicodeWrap(String str, boolean isolate) { 658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return unicodeWrap(str, defaultTextDirectionHeuristicCompat, isolate); 659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the 663b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * formatter's default direction estimation algorithm and assumes {@code isolate} is true. 664b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * 665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str The input string. 6661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * @return Input string after applying the above processing. 667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang public String unicodeWrap(String str) { 669ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return unicodeWrap(str, defaultTextDirectionHeuristicCompat, true /* isolate */); 670ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 671ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Helper method to return true if the Locale directionality is RTL. 674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * @param locale The Locale whose directionality will be checked to be RTL or LTR 676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @return true if the {@code locale} directionality is RTL. False otherwise. 677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static boolean isRtlLocale(Locale locale) { 679ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return (TextUtilsCompat.getLayoutDirectionFromLocale(locale) == ViewCompat.LAYOUT_DIRECTION_RTL); 680ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 681ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 682ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Enum for directionality type. 684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final int DIR_LTR = -1; 686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final int DIR_UNKNOWN = 0; 687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final int DIR_RTL = +1; 688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 690b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * Returns the directionality of the last character with strong directionality in the string, or 691b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards from the end of 692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its matching PDF as a 6931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results are undefined for a 694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string containing unbalanced LRE/RLE/LRO/RLO/PDF characters. The intended use is to check 695ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * whether a logically separate item that starts with a number or a character of the string's 696ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * exit directionality and follows this string inline (not counting any neutral characters in 697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * between) would "stick" to it in an opposite-directionality context, thus being displayed in 698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * an incorrect position. An LRM or RLM character (the one of the context's directionality) 699ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * between the two will prevent such sticking. 700ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str the string to check. 702ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 703b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian private static int getExitDir(String str) { 704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return new DirectionalityEstimator(str, false /* isHtml */).getExitDir(); 7051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang } 7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Returns the directionality of the first character with strong directionality in the string, 709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an 710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL after 711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * RLE/RLO. The results are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF 712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * characters. The intended use is to check whether a logically separate item that ends with a 713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * character of the string's entry directionality and precedes the string inline (not counting 714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * any neutral characters in between) would "stick" to it in an opposite-directionality context, 715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * thus being displayed in an incorrect position. An LRM or RLM character (the one of the 716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * context's directionality) between the two will prevent such sticking. 717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param str the string to check. 719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static int getEntryDir(String str) { 721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return new DirectionalityEstimator(str, false /* isHtml */).getEntryDir(); 722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 72391037db265ecdd914a26e056cf69207b4f50924ehkuang 7241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang /** 725b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * An object that estimates the directionality of a given string by various methods. 726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static class DirectionalityEstimator { 729ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 730b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian // Internal static variables and constants. 731b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 732b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian /** 733b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian * Size of the bidi character class cache. The results of the Character.getDirectionality() 7341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * calls on the lowest DIR_TYPE_CACHE_SIZE codepoints are kept in an array for speed. 735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The 0x700 value is designed to leave all the European and Near Eastern languages in the 736ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cache. It can be reduced to 0x180, restricting the cache to the Western European 737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * languages. 738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 739ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final int DIR_TYPE_CACHE_SIZE = 0x700; 740ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 741ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 742ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The bidi character class cache. 743ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 744ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static final byte DIR_TYPE_CACHE[]; 745ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang static { 747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DIR_TYPE_CACHE = new byte[DIR_TYPE_CACHE_SIZE]; 748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (int i = 0; i < DIR_TYPE_CACHE_SIZE; i++) { 749ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DIR_TYPE_CACHE[i] = Character.getDirectionality(i); 750ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Internal instance variables. 754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The text to be scanned. 757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final String text; 759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Whether the text to be scanned is to be treated as HTML, i.e. skipping over tags and 762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * entities when looking for the next / preceding dir type. 763ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 764ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final boolean isHtml; 765ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 766ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 767ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The length of the text in chars. 768ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 769ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private final int length; 770ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 772ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The current position in the text. 773ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private int charIndex; 775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 776ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 777ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * The char encountered by the last dirTypeForward or dirTypeBackward call. If it 778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * encountered a supplementary codepoint, this contains a char that is not a valid 779ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * codepoint. This is ok, because this member is only used to detect some well-known ASCII 780ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * syntax, e.g. "http://" and the beginning of an HTML tag or entity. 781ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 782ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private char lastChar; 783ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 784ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 785ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Constructor. 786ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 787ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param text The string to scan. 788ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @param isHtml Whether the text to be scanned is to be treated as HTML, i.e. skipping over 789ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * tags and entities. 790ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 791ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang DirectionalityEstimator(String text, boolean isHtml) { 792ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.text = text; 793ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang this.isHtml = isHtml; 794ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang length = text.length(); 795ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 796ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 797ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 798ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns the directionality of the first character with strong directionality in the 799ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string, or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an 800ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL 801ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * after RLE/RLO. The results are undefined for a string containing unbalanced 802ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * LRE/RLE/LRO/RLO/PDF characters. 803ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 804ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int getEntryDir() { 805ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // The reason for this method name, as opposed to getFirstStrongDir(), is that 806ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // "first strong" is a commonly used description of Unicode's estimation algorithm, 807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // but the two must treat formatting characters quite differently. Thus, we are staying 808ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // away from both "first" and "last" in these method names to avoid confusion. 809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang charIndex = 0; 810ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int embeddingLevel = 0; 811ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int embeddingLevelDir = DIR_UNKNOWN; 812ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int firstNonEmptyEmbeddingLevel = 0; 813ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang while (charIndex < length && firstNonEmptyEmbeddingLevel == 0) { 814ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang switch (dirTypeForward()) { 815ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING: 816ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE: 817ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ++embeddingLevel; 818ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang embeddingLevelDir = DIR_LTR; 819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING: 821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE: 822ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ++embeddingLevel; 823ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang embeddingLevelDir = DIR_RTL; 824ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 825ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT: 826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang --embeddingLevel; 827ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // To restore embeddingLevelDir to its previous value, we would need a 828ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stack, which we want to avoid. Thus, at this point we do not know the 829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // current embedding's directionality. 830ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang embeddingLevelDir = DIR_UNKNOWN; 831ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL: 833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT: 835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (embeddingLevel == 0) { 836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_LTR; 837ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 838ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang firstNonEmptyEmbeddingLevel = embeddingLevel; 839ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT: 841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC: 842ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (embeddingLevel == 0) { 8431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang return DIR_RTL; 844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 845ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang firstNonEmptyEmbeddingLevel = embeddingLevel; 846ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 847ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang default: 848ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang firstNonEmptyEmbeddingLevel = embeddingLevel; 849ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 850ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 851ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 852ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 853ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We have either found a non-empty embedding or scanned the entire string finding 854ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // neither a non-empty embedding nor a strong character outside of an embedding. 855ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (firstNonEmptyEmbeddingLevel == 0) { 856ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We have not found a non-empty embedding. Thus, the string contains neither a 8575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // non-empty embedding nor a strong character outside of an embedding. 858ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_UNKNOWN; 8595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 860ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 861ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We have found a non-empty embedding. 862ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (embeddingLevelDir != DIR_UNKNOWN) { 863ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We know the directionality of the non-empty embedding. 864ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return embeddingLevelDir; 865ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 866ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 867ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We do not remember the directionality of the non-empty embedding we found. So, we go 868ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // backwards to find the start of the non-empty embedding and get its directionality. 869ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang while (charIndex > 0) { 870ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang switch (dirTypeBackward()) { 871ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING: 872b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE: 873b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (firstNonEmptyEmbeddingLevel == embeddingLevel) { 874b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian return DIR_LTR; 875b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian } 876b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian --embeddingLevel; 877b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian break; 8781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING: 879ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE: 880ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (firstNonEmptyEmbeddingLevel == embeddingLevel) { 881ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_RTL; 882ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 883ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang --embeddingLevel; 884ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 885ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT: 886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ++embeddingLevel; 887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 889ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 890ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We should never get here. 891ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_UNKNOWN; 892ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 894ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 895ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns the directionality of the last character with strong directionality in the 896ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * string, or DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards 897ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * from the end of the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its 898ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * matching PDF as a strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results 89991037db265ecdd914a26e056cf69207b4f50924ehkuang * are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF characters. 90091037db265ecdd914a26e056cf69207b4f50924ehkuang */ 90191037db265ecdd914a26e056cf69207b4f50924ehkuang int getExitDir() { 902ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // The reason for this method name, as opposed to getLastStrongDir(), is that "last 903ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // strong" sounds like the exact opposite of "first strong", which is a commonly used 904ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // description of Unicode's estimation algorithm (getUnicodeDir() above), but the two 905ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // must treat formatting characters quite differently. Thus, we are staying away from 906ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // both "first" and "last" in these method names to avoid confusion. 907ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang charIndex = length; 908ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int embeddingLevel = 0; 909ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int lastNonEmptyEmbeddingLevel = 0; 910ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang while (charIndex > 0) { 911ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang switch (dirTypeBackward()) { 912ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT: 913ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (embeddingLevel == 0) { 914ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_LTR; 915ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 916ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (lastNonEmptyEmbeddingLevel == 0) { 917ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lastNonEmptyEmbeddingLevel = embeddingLevel; 918ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 919ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING: 921ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE: 922ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (lastNonEmptyEmbeddingLevel == embeddingLevel) { 923ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_LTR; 924ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 925ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang --embeddingLevel; 926ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 927ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT: 928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC: 929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (embeddingLevel == 0) { 930ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_RTL; 931ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 932ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (lastNonEmptyEmbeddingLevel == 0) { 933ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lastNonEmptyEmbeddingLevel = embeddingLevel; 934ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 935ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 936b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING: 937b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE: 938b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (lastNonEmptyEmbeddingLevel == embeddingLevel) { 939b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian return DIR_RTL; 940b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian } 941b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian --embeddingLevel; 9421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang break; 943ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT: 944ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ++embeddingLevel; 945ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 946ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL: 947ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 948ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang default: 949ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (lastNonEmptyEmbeddingLevel == 0) { 950ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang lastNonEmptyEmbeddingLevel = embeddingLevel; 951ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 952ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang break; 953ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 954ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 955ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return DIR_UNKNOWN; 956ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 957ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 958ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Internal methods 959ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 960ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 961ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Gets the bidi character class, i.e. Character.getDirectionality(), of a given char, using 962ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * a cache for speed. Not designed for supplementary codepoints, whose results we do not 963ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cache. 964ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 965ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang private static byte getCachedDirectionality(char c) { 966ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return c < DIR_TYPE_CACHE_SIZE ? DIR_TYPE_CACHE[c] : Character.getDirectionality(c); 967ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 968b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 969ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /** 970ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Returns the Character.DIRECTIONALITY_... value of the next codepoint and advances 9711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * charIndex. If isHtml, and the codepoint is '<' or '&', advances through the tag/entity, 9721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * and returns Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to 9731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * figure out the actual character, and return its dirtype, but treating it as whitespace is 9741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang * good enough for our purposes. 975ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 976ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * @throws java.lang.IndexOutOfBoundsException if called when charIndex >= length or < 0. 977ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 978 byte dirTypeForward() { 979 lastChar = text.charAt(charIndex); 980 if (Character.isHighSurrogate(lastChar)) { 981 int codePoint = Character.codePointAt(text, charIndex); 982 charIndex += Character.charCount(codePoint); 983 return Character.getDirectionality(codePoint); 984 } 985 charIndex++; 986 byte dirType = getCachedDirectionality(lastChar); 987 if (isHtml) { 988 // Process tags and entities. 989 if (lastChar == '<') { 990 dirType = skipTagForward(); 991 } else if (lastChar == '&') { 992 dirType = skipEntityForward(); 993 } 994 } 995 return dirType; 996 } 997 998 /** 999 * Returns the Character.DIRECTIONALITY_... value of the preceding codepoint and advances 1000 * charIndex backwards. If isHtml, and the codepoint is the end of a complete HTML tag or 1001 * entity, advances over the whole tag/entity and returns 1002 * Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to figure out the 1003 * actual character, and return its dirtype, but treating it as whitespace is good enough 1004 * for our purposes. 1005 * 1006 * @throws java.lang.IndexOutOfBoundsException if called when charIndex > length or <= 0. 1007 */ 1008 byte dirTypeBackward() { 1009 lastChar = text.charAt(charIndex - 1); 1010 if (Character.isLowSurrogate(lastChar)) { 1011 int codePoint = Character.codePointBefore(text, charIndex); 1012 charIndex -= Character.charCount(codePoint); 1013 return Character.getDirectionality(codePoint); 1014 } 1015 charIndex--; 1016 byte dirType = getCachedDirectionality(lastChar); 1017 if (isHtml) { 1018 // Process tags and entities. 1019 if (lastChar == '>') { 1020 dirType = skipTagBackward(); 1021 } else if (lastChar == ';') { 1022 dirType = skipEntityBackward(); 1023 } 1024 } 1025 return dirType; 1026 } 1027 1028 /** 1029 * Advances charIndex forward through an HTML tag (after the opening < has already been 1030 * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching >, 1031 * does not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the 1032 * < that hadn't been part of a tag after all). 1033 */ 1034 private byte skipTagForward() { 1035 int initialCharIndex = charIndex; 1036 while (charIndex < length) { 1037 lastChar = text.charAt(charIndex++); 1038 if (lastChar == '>') { 1039 // The end of the tag. 1040 return Character.DIRECTIONALITY_WHITESPACE; 1041 } 1042 if (lastChar == '"' || lastChar == '\'') { 1043 // Skip over a quoted attribute value inside the tag. 1044 char quote = lastChar; 1045 while (charIndex < length && (lastChar = text.charAt(charIndex++)) != quote) {} 1046 } 1047 } 1048 // The original '<' wasn't the start of a tag after all. 1049 charIndex = initialCharIndex; 1050 lastChar = '<'; 1051 return Character.DIRECTIONALITY_OTHER_NEUTRALS; 1052 } 1053 1054 /** 1055 * Advances charIndex backward through an HTML tag (after the closing > has already been 1056 * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching <, does 1057 * not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the > 1058 * that hadn't been part of a tag after all). Nevertheless, the running time for calling 1059 * skipTagBackward() in a loop remains linear in the size of the text, even for a text like 1060 * ">>>>", because skipTagBackward() also stops looking for a matching < 1061 * when it encounters another >. 1062 */ 1063 private byte skipTagBackward() { 1064 int initialCharIndex = charIndex; 1065 while (charIndex > 0) { 1066 lastChar = text.charAt(--charIndex); 1067 if (lastChar == '<') { 1068 // The start of the tag. 1069 return Character.DIRECTIONALITY_WHITESPACE; 1070 } 1071 if (lastChar == '>') { 1072 break; 1073 } 1074 if (lastChar == '"' || lastChar == '\'') { 1075 // Skip over a quoted attribute value inside the tag. 1076 char quote = lastChar; 1077 while (charIndex > 0 && (lastChar = text.charAt(--charIndex)) != quote) {} 1078 } 1079 } 1080 // The original '>' wasn't the end of a tag after all. 1081 charIndex = initialCharIndex; 1082 lastChar = '>'; 1083 return Character.DIRECTIONALITY_OTHER_NEUTRALS; 1084 } 1085 1086 /** 1087 * Advances charIndex forward through an HTML character entity tag (after the opening 1088 * & has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be 1089 * best to figure out the actual character and return its dirtype, but this is good enough. 1090 */ 1091 private byte skipEntityForward() { 1092 while (charIndex < length && (lastChar = text.charAt(charIndex++)) != ';') {} 1093 return Character.DIRECTIONALITY_WHITESPACE; 1094 } 1095 1096 /** 1097 * Advances charIndex backward through an HTML character entity tag (after the closing ; 1098 * has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be best 1099 * to figure out the actual character and return its dirtype, but this is good enough. 1100 * If there is no matching &, does not change charIndex and returns 1101 * Character.DIRECTIONALITY_OTHER_NEUTRALS (for the ';' that did not start an entity after 1102 * all). Nevertheless, the running time for calling skipEntityBackward() in a loop remains 1103 * linear in the size of the text, even for a text like ";;;;;;;", because skipTagBackward() 1104 * also stops looking for a matching & when it encounters another ;. 1105 */ 1106 private byte skipEntityBackward() { 1107 int initialCharIndex = charIndex; 1108 while (charIndex > 0) { 1109 lastChar = text.charAt(--charIndex); 1110 if (lastChar == '&') { 1111 return Character.DIRECTIONALITY_WHITESPACE; 1112 } 1113 if (lastChar == ';') { 1114 break; 1115 } 1116 } 1117 charIndex = initialCharIndex; 1118 lastChar = ';'; 1119 return Character.DIRECTIONALITY_OTHER_NEUTRALS; 1120 } 1121 } 1122}