BidiFormatter.java revision 77f6bada6f88acea9025afce3eb0127d45411798
1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Copyright (C) 2013 The Android Open Source Project
3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Licensed under the Apache License, Version 2.0 (the "License");
5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * you may not use this file except in compliance with the License.
6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * You may obtain a copy of the License at
7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *      http://www.apache.org/licenses/LICENSE-2.0
9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Unless required by applicable law or agreed to in writing, software
115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * distributed under the License is distributed on an "AS IS" BASIS,
125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * See the License for the specific language governing permissions and
14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * limitations under the License.
15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
16b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
17b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianpackage android.support.v4.text.bidi;
18b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
19b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.text.TextDirectionHeuristicCompat;
20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport android.support.v4.text.TextDirectionHeuristicsCompat;
21b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.text.TextUtilsCompat;
22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianimport android.support.v4.view.ViewCompat;
23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport java.util.Locale;
25b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangimport static android.support.v4.text.TextDirectionHeuristicsCompat.FIRSTSTRONG_LTR;
27ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/**
29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Utility class for formatting text for display in a potentially opposite-directionality context
30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * without garbling. The directionality of the context is set at formatter creation and the
31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality of the text can be either estimated or passed in when known. Provides the
32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * following functionality:
33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 1. Bidi Wrapping
35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * When text in one language is mixed into a document in another, opposite-directionality language,
36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * e.g. when an English business name is embedded in a Hebrew web page, both the inserted string
37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * and the text surrounding it may be displayed incorrectly unless the inserted string is explicitly
38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * separated from the surrounding text in a "wrapper" that:
39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * - Declares its directionality so that the string is displayed correctly. This can be done in HTML
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   markup (e.g. a 'span dir="rtl"' element) by {@link #spanWrap} and similar methods, or - only in
42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   contexts where markup can't be used - in Unicode bidi formatting codes by {@link #unicodeWrap}
43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   and similar methods.
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * - Isolates the string's directionality, so it does not unduly affect the surrounding content.
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   Currently, this can only be done using invisible Unicode characters of the same direction as
47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   the context (LRM or RLM) in addition to the directionality declaration above, thus "resetting"
48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   the directionality to that of the context. The "reset" may need to be done at both ends of the
49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   string. Without "reset" after the string, the string will "stick" to a number or logically
50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   separate opposite-direction text that happens to follow it in-line (even if separated by
51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   neutral content like spaces and punctuation). Without "reset" before the string, the same can
52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   happen there, but only with more opposite-direction text, not a number. One approach is to
53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   "reset" the direction only after each string, on the theory that if the preceding opposite-
54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   direction text is itself bidi-wrapped, the "reset" after it will prevent the sticking. (Doing
55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   the "reset" only before each string definitely does not work because we do not want to require
56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   bidi-wrapping numbers, and a bidi-wrapped opposite-direction string could be followed by a
571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang *   number.) Still, the safest policy is to do the "reset" on both ends of each string, since RTL
58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   message translations often contain untranslated Latin-script brand names and technical terms,
59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   and one of these can be followed by a bidi-wrapped inserted value. On the other hand, when one
60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   has such a message, it is best to do the "reset" manually in the message translation itself,
61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   since the message's opposite-direction text could be followed by an inserted number, which we
62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   would not bidi-wrap anyway. Thus, "reset" only after the string is the current default. In an
63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   alternative to "reset", recent additions to the HTML, CSS, and Unicode standards allow the
64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   isolation to be part of the directionality declaration. This form of isolation is better than
65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   "reset" because it takes less space, does not require knowing the context directionality, has a
66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   gentler effect than "reset", and protects both ends of the string. However, we do not yet allow
67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *   using it because required platforms do not yet support it.
68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Providing these wrapping services is the basic purpose of the bidi formatter.
70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 2. Directionality estimation
72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * How does one know whether a string about to be inserted into surrounding text has the same
73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality? Well, in many cases, one knows that this must be the case when writing the code
74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * doing the insertion, e.g. when a localized message is inserted into a localized page. In such
75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cases there is no need to involve the bidi formatter at all. In some other cases, it need not be
76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the same as the context, but is either constant (e.g. urls are always LTR) or otherwise known.
77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * In the remaining cases, e.g. when the string is user-entered or comes from a database, the
78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * language of the string (and thus its directionality) is not known a priori, and must be
79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * estimated at run-time. The bidi formatter can do this automatically using the default
80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * first-strong estimation algorithm. It can also be configured to use a custom directionality
81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * estimation object.
82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 3. Escaping
84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * When wrapping plain text - i.e. text that is not already HTML or HTML-escaped - in HTML markup,
85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * the text must first be HTML-escaped to prevent XSS attacks and other nasty business. This of
86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * course is always true, but the escaping can not be done after the string has already been wrapped
87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in markup, so the bidi formatter also serves as a last chance and includes escaping services.
88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * <p>
89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Thus, in a single call, the formatter will escape the input string as specified, determine its
90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * directionality, and wrap it as necessary. It is then up to the caller to insert the return value
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in the output.
92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangpublic final class BidiFormatter {
94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * The default text direction heuristic.
97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static TextDirectionHeuristicCompat DEFAULT_TEXT_DIRECTION_HEURISTIC = FIRSTSTRONG_LTR;
99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Unicode "Left-To-Right Embedding" (LRE) character.
102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final char LRE = '\u202A';
104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Unicode "Right-To-Left Embedding" (RLE) character.
107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final char RLE = '\u202B';
109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Unicode "Pop Directional Formatting" (PDF) character.
112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final char PDF = '\u202C';
114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *  Unicode "Left-To-Right Mark" (LRM) character.
117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final char LRM = '\u200E';
119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /*
121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Unicode "Right-To-Left Mark" (RLM) character.
122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final char RLM = '\u200F';
124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /*
126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * String representation of LRM
127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String LRM_STRING = Character.toString(LRM);
129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /*
131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * String representation of RLM
132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String RLM_STRING = Character.toString(RLM);
134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "ltr" string constant.
137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String LTR_STRING = "ltr";
139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "rtl" string constant.
142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String RTL_STRING = "rtl";
144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "dir=\"ltr\"" string constant.
147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String DIR_LTR_STRING = "dir=\"ltr\"";
149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "dir=\"rtl\"" string constant.
152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String DIR_RTL_STRING = "dir=\"rtl\"";
154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "right" string constant.
157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String RIGHT = "right";
159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * "left" string constant.
162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String LEFT = "left";
164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Empty string constant.
167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final String EMPTY_STRING = "";
169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * A class for building a BidiFormatter with non-default options.
172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public static final class Builder {
174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private boolean isRtlContext;
175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private int flags;
176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private TextDirectionHeuristicCompat textDirectionHeuristicCompat;
177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Constructor.
180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public Builder() {
183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            initialize(isRtlLocale(Locale.getDefault()));
184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Constructor.
188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param rtlContext Whether the context directionality is RTL.
190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public Builder(boolean rtlContext) {
192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            initialize(rtlContext);
193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Constructor.
197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param locale The context locale.
199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public Builder(Locale locale) {
201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            initialize(isRtlLocale(locale));
202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Initializes the builder with the given context directionality and default options.
206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param isRtlContext Whether the context is RTL or not.
208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private void initialize(boolean isRtlContext) {
210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this.isRtlContext = isRtlContext;
211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            textDirectionHeuristicCompat = DEFAULT_TEXT_DIRECTION_HEURISTIC;
212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this.flags = DEFAULT_FLAGS;
213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Specifies whether the BidiFormatter to be built should also "reset" directionality before
217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * a string being bidi-wrapped, not just after it. The default is false.
218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public Builder stereoReset(boolean stereoReset) {
220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (stereoReset) {
221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                flags |= FLAG_STEREO_RESET;
222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            } else {
223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                flags &= ~FLAG_STEREO_RESET;
224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return this;
226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Specifies the default directionality estimation algorithm to be used by the BidiFormatter.
230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * By default, uses the first-strong heuristic.
231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param heuristic the {@code TextDirectionHeuristic} to use.
233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @return the builder itself.
234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public Builder setTextDirectionHeuristic(TextDirectionHeuristicCompat heuristic) {
236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this.textDirectionHeuristicCompat = heuristic;
237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return this;
238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private static BidiFormatter getDefaultInstanceFromContext(boolean isRtlContext) {
241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return isRtlContext ? DEFAULT_RTL_INSTANCE : DEFAULT_LTR_INSTANCE;
242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @return A BidiFormatter with the specified options.
246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        public BidiFormatter build() {
248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (flags == DEFAULT_FLAGS &&
249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    textDirectionHeuristicCompat == DEFAULT_TEXT_DIRECTION_HEURISTIC) {
250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                return getDefaultInstanceFromContext(isRtlContext);
251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return new BidiFormatter(isRtlContext, flags, textDirectionHeuristicCompat);
253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    //
2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    private static final int FLAG_STEREO_RESET = 2;
258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final int DEFAULT_FLAGS = FLAG_STEREO_RESET;
259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final BidiFormatter DEFAULT_LTR_INSTANCE = new BidiFormatter(
261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            false /* LTR context */,
262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            DEFAULT_FLAGS,
263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            DEFAULT_TEXT_DIRECTION_HEURISTIC);
264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final BidiFormatter DEFAULT_RTL_INSTANCE = new BidiFormatter(
266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            true /* RTL context */,
267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            DEFAULT_FLAGS,
268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            DEFAULT_TEXT_DIRECTION_HEURISTIC);
269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private final boolean isRtlContext;
271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private final int flags;
272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private final TextDirectionHeuristicCompat defaultTextDirectionHeuristicCompat;
273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Factory for creating an instance of BidiFormatter given the context directionality.
276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param rtlContext Whether the context directionality is RTL.
278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public static BidiFormatter getInstance(boolean rtlContext) {
280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return new Builder(rtlContext).build();
281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Factory for creating an instance of BidiFormatter given the context locale.
285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param locale The context locale.
287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
288ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public static BidiFormatter getInstance(Locale locale) {
289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return new Builder(locale).build();
290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param isRtlContext Whether the context directionality is RTL or not.
294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param flags The option flags.
295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The default text direction heuristic.
296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private BidiFormatter(boolean isRtlContext, int flags, TextDirectionHeuristicCompat heuristic) {
298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this.isRtlContext = isRtlContext;
299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this.flags = flags;
300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this.defaultTextDirectionHeuristicCompat = heuristic;
301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Whether the context directionality is RTL
305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public boolean isRtlContext() {
307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return isRtlContext;
308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Whether directionality "reset" should also be done before a string being
312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * bidi-wrapped, not just after it.
313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public boolean getStereoReset() {
315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return (flags & FLAG_STEREO_RESET) != 0;
316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" if it is LTR.
320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String whose directionality is to be estimated.
322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise.
323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttrValue(String str) {
325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return dirAttrValue(isRtl(str));
326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #dirAttrValue(String)}, but uses a given heuristic to estimate the
330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * {@code str}'s directionality.
331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String whose directionality is to be estimated.
333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *                  directionality.
335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise.
336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttrValue(String str, TextDirectionHeuristicCompat heuristic) {
338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return dirAttrValue(heuristic.isRtl(str, 0, str.length()));
339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "rtl" if the given directionality is RTL, and "ltr" if it is LTR.
343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param isRtl Whether the directionality is RTL or not.
345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "rtl" if the given directionality is RTL, and "ltr" otherwise.
346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttrValue(boolean isRtl) {
348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return isRtl ? RTL_STRING : LTR_STRING;
349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on {@code str}'s estimated directionality,
353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * if it is not the same as the context directionality. Otherwise, returns the empty string.
354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String whose directionality is to be estimated.
356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     context; else, the empty string.
358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttr(String str) {
360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return dirAttr(isRtl(str));
361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #dirAttr(String)}, but uses a given heuristic to estimate the
365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * {@code str}'s directionality.
366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String whose directionality is to be estimated.
368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *                  directionality.
370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     context; else, the empty string.
372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttr(String str, TextDirectionHeuristicCompat heuristic) {
374b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        return dirAttr(heuristic.isRtl(str, 0, str.length()));
375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on the given directionality, if it is not
379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * the same as the context directionality. Otherwise, returns the empty string.
380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param isRtl Whether the directionality is RTL or not
382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     context; else, the empty string.
384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String dirAttr(boolean isRtl) {
386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return (isRtl != isRtlContext) ? (isRtl ? DIR_RTL_STRING :  DIR_LTR_STRING) : EMPTY_STRING;
387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * overall or the exit directionality of a given string is opposite to the context directionality.
392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Putting this after the string (including its directionality declaration wrapping) prevents it
393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * from "sticking" to other opposite-directionality text or a number appearing after it inline
394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * with only neutral content in between. Otherwise returns the empty string. While the exit
395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality is determined by scanning the end of the string, the overall directionality is
396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * given explicitly in {@code dir}.
397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String after which the mark may need to appear.
399b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     else, the empty string.
401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String markAfter(String str) {
403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return markAfter(str, defaultTextDirectionHeuristicCompat);
404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #markAfter(String)}, but uses a given heuristic to estimate the
408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * {@code str}'s directionality.
409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
41091037db265ecdd914a26e056cf69207b4f50924ehkuang     * @param str String after which the mark may need to appear.
411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *                  directionality.
413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     else, the empty string.
4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     */
416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String markAfter(String str, TextDirectionHeuristicCompat heuristic) {
417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // getExitDir() is called only if needed (short-circuit).
419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (!isRtlContext && (isRtl || getExitDir(str) == DIR_RTL)) {
420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return LRM_STRING;
421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (isRtlContext && (!isRtl || getExitDir(str) == DIR_LTR)) {
423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return RLM_STRING;
424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return EMPTY_STRING;
426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * overall or the entry directionality of a given string is opposite to the context
431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality. Putting this before the string (including its directionality declaration
432ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before it
433ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * inline with only neutral content in between. Otherwise returns the empty string. While the
434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * entry directionality is determined by scanning the beginning of the string, the overall
435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality is given explicitly in {@code dir}.
436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String before which the mark may need to appear.
438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     else, the empty string.
440ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String markBefore(String str) {
442ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return markBefore(str, defaultTextDirectionHeuristicCompat);
443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #markBefore(String)}, but uses a given heuristic to estimate the
447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * {@code str}'s directionality.
448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String before which the mark may need to appear.
450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *                  directionality.
452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     else, the empty string.
454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String markBefore(String str, TextDirectionHeuristicCompat heuristic) {
456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // getEntryDir() is called only if needed (short-circuit).
458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (!isRtlContext && (isRtl || getEntryDir(str) == DIR_RTL)) {
459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return LRM_STRING;
460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (isRtlContext && (!isRtl || getEntryDir(str) == DIR_LTR)) {
462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return RLM_STRING;
463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return EMPTY_STRING;
465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
468ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns the Unicode bidi mark matching the context directionality (LRM for LTR context
469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality, RLM for RTL context directionality).
470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String mark() {
472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return isRtlContext ? RLM_STRING : LRM_STRING;
473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "right" for RTL context directionality. Otherwise for LTR context directionality
477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * returns "left".
478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String startEdge() {
480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return isRtlContext  ? RIGHT : LEFT;
481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Returns "left" for RTL context directionality. Otherwise for LTR context directionality
485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * returns "right".
486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String endEdge() {
488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return isRtlContext ? LEFT : RIGHT;
489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Estimates the directionality of a string using the default text direction heuristic.
493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str String whose directionality is to be estimated.
495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return true if {@code str}'s estimated overall directionality is RTL. Otherwise returns
496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *          false.
497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public boolean isRtl(String str) {
499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return defaultTextDirectionHeuristicCompat.isRtl(str, 0, str.length());
500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Formats a given string of unknown directionality for use in HTML output of the context
504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality, so an opposite-directionality string is neither garbled nor garbles its
505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * surroundings.
506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * The algorithm: estimates the directionality of the given string using the given heuristic.
508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * If the directionality is known, pass TextDirectionHeuristics.LTR or RTL for heuristic.
509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * In case its directionality doesn't match the context directionality, wraps it with a 'span'
510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * element and adds a "dir" attribute (either 'dir=\"rtl\"' or 'dir=\"ltr\"').
511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * If {@code isolate}, directionally isolates the string so that it does not garble its
513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * surroundings. Currently, this is done by "resetting" the directionality after the string by
514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when
515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * either the overall directionality or the exit directionality of the string is opposite to that
516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and
517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * passing "true" as an argument, also prepends a Unicode bidi mark matching the context
518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality when either the overall directionality or the entry directionality of the
519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * string is opposite to that of the context.
520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The algorithm to be used to estimate the string's overall direction.
524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
525ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     content around it.
526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Input string after applying the above processing.
527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String spanWrap(String str, TextDirectionHeuristicCompat heuristic, boolean isolate) {
529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        String origStr = str;
531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        str = TextUtilsCompat.htmlEncode(str);
532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        StringBuilder result = new StringBuilder();
534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (getStereoReset() && isolate) {
535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(markBefore(origStr,
536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR));
537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (isRtl != isRtlContext) {
539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append("<span ").append(dirAttr(isRtl)).append('>').append(str).append("</span>");
540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        } else {
541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(str);
542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (isolate) {
544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(markAfter(origStr,
545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR));
546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return result.toString();
548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but assumes
552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * {@code isolate} is true.
553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param heuristic The algorithm to be used to estimate the string's overall direction.
556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Input string after applying the above processing.
557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String spanWrap(String str, TextDirectionHeuristicCompat heuristic) {
559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return spanWrap(str, heuristic, true /* isolate */);
560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the
564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * formatter's default direction estimation algorithm.
565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *     content around it
569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Input string after applying the above processing.
570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String spanWrap(String str, boolean isolate) {
572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return spanWrap(str, defaultTextDirectionHeuristicCompat, isolate);
573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #spanWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the
577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * formatter's default direction estimation algorithm and assumes {@code isolate} is true.
578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return Input string after applying the above processing.
581ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
582ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String spanWrap(String str) {
583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return spanWrap(str, defaultTextDirectionHeuristicCompat, true /* isolate */);
584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Formats a string of given directionality for use in plain-text output of the context
588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality, so an opposite-directionality string is neither garbled nor garbles its
589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * surroundings. As opposed to {@link #spanWrap}, this makes use of Unicode bidi
590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * formatting characters. In HTML, its *only* valid use is inside of elements that do not allow
591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * markup, e.g. the 'option' and 'title' elements.
592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * The algorithm: In case the given directionality doesn't match the context directionality, wraps
594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * the string with Unicode bidi formatting characters: RLE+{@code str}+PDF for RTL text, or
595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * LRE+{@code str}+PDF for LTR text.
596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
597ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * If {@code isolate}, directionally isolates the string so that it does not garble its
598ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * surroundings. Currently, this is done by "resetting" the directionality after the string by
599ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when
600ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * either the overall directionality or the exit directionality of the string is opposite to that
601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and
602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * passing "true" as an argument, also prepends a Unicode bidi mark matching the context
603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality when either the overall directionality or the entry directionality of the
604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * string is opposite to that of the context. Note that as opposed to the overall
605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * directionality, the entry and exit directionalities are determined from the string itself.
606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * <p>
607ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Does *not* do HTML-escaping.
6081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang     *
6091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang     * @param str The input string.
610b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @param heuristic The algorithm to be used to estimate the string's overall direction.
611b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
612b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     *     content around it
613b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @return Input string after applying the above processing.
614ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
6151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    public String unicodeWrap(String str, TextDirectionHeuristicCompat heuristic, boolean isolate) {
616ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        StringBuilder result = new StringBuilder();
618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (getStereoReset() && isolate) {
6191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            result.append(markBefore(str,
620ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR));
621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
622ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (isRtl != isRtlContext) {
623b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            result.append(isRtl ? RLE : LRE);
624ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(str);
625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(PDF);
626b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        } else {
627b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            result.append(str);
628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
629b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        if (isolate) {
630ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            result.append(markAfter(str,
631b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                    isRtl ? TextDirectionHeuristicsCompat.RTL : TextDirectionHeuristicsCompat.LTR));
632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return result.toString();
634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
635b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
636b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    /**
637b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but assumes
638b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * {@code isolate} is true.
639ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
641b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @param heuristic The algorithm to be used to estimate the string's overall direction.
642b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * @return Input string after applying the above processing.
643b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     */
644ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String unicodeWrap(String str, TextDirectionHeuristicCompat heuristic) {
645ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return unicodeWrap(str, heuristic, true /* isolate */);
646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the
6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * formatter's default direction estimation algorithm.
6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     *
6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * @param str The input string.
6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     *     content around it
6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * @return Input string after applying the above processing.
6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     */
657b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    public String unicodeWrap(String str, boolean isolate) {
658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return unicodeWrap(str, defaultTextDirectionHeuristicCompat, isolate);
659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Operates like {@link #unicodeWrap(String, android.support.v4.text.TextDirectionHeuristicCompat, boolean)}, but uses the
663b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * formatter's default direction estimation algorithm and assumes {@code isolate} is true.
664b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     *
665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str The input string.
6661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang     * @return Input string after applying the above processing.
667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    public String unicodeWrap(String str) {
669ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return unicodeWrap(str, defaultTextDirectionHeuristicCompat, true /* isolate */);
670ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
671ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Helper method to return true if the Locale directionality is RTL.
674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * @param locale The Locale whose directionality will be checked to be RTL or LTR
676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @return true if the {@code locale} directionality is RTL. False otherwise.
677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static boolean isRtlLocale(Locale locale) {
679ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return (TextUtilsCompat.getLayoutDirectionFromLocale(locale) == ViewCompat.LAYOUT_DIRECTION_RTL);
680ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
681ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
682ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * Enum for directionality type.
684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final int DIR_LTR = -1;
686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final int DIR_UNKNOWN = 0;
687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static final int DIR_RTL = +1;
688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
690b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * Returns the directionality of the last character with strong directionality in the string, or
691b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards from the end of
692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its matching PDF as a
6931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang     * strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results are undefined for a
694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * string containing unbalanced LRE/RLE/LRO/RLO/PDF characters. The intended use is to check
695ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * whether a logically separate item that starts with a number or a character of the string's
696ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * exit directionality and follows this string inline (not counting any neutral characters in
697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * between) would "stick" to it in an opposite-directionality context, thus being displayed in
698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * an incorrect position. An LRM or RLM character (the one of the context's directionality)
699ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * between the two will prevent such sticking.
700ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str the string to check.
702ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
703b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    private static int getExitDir(String str) {
704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return new DirectionalityEstimator(str, false /* isHtml */).getExitDir();
7051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    /**
7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang     * Returns the directionality of the first character with strong directionality in the string,
709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an
710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL after
711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * RLE/RLO. The results are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF
712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * characters. The intended use is to check whether a logically separate item that ends with a
713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * character of the string's entry directionality and precedes the string inline (not counting
714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * any neutral characters in between) would "stick" to it in an opposite-directionality context,
715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * thus being displayed in an incorrect position. An LRM or RLM character (the one of the
716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * context's directionality) between the two will prevent such sticking.
717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     * @param str the string to check.
719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static int getEntryDir(String str) {
721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        return new DirectionalityEstimator(str, false /* isHtml */).getEntryDir();
722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
72391037db265ecdd914a26e056cf69207b4f50924ehkuang
7241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    /**
725b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian     * An object that estimates the directionality of a given string by various methods.
726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     *
727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     */
728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    private static class DirectionalityEstimator {
729ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
730b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        // Internal static variables and constants.
731b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
732b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        /**
733b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian         * Size of the bidi character class cache. The results of the Character.getDirectionality()
7341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang         * calls on the lowest DIR_TYPE_CACHE_SIZE codepoints are kept in an array for speed.
735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The 0x700 value is designed to leave all the European and Near Eastern languages in the
736ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * cache. It can be reduced to 0x180, restricting the cache to the Western European
737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * languages.
738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
739ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private static final int DIR_TYPE_CACHE_SIZE = 0x700;
740ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
741ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
742ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The bidi character class cache.
743ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
744ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private static final byte DIR_TYPE_CACHE[];
745ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        static {
747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            DIR_TYPE_CACHE = new byte[DIR_TYPE_CACHE_SIZE];
748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            for (int i = 0; i < DIR_TYPE_CACHE_SIZE; i++) {
749ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                DIR_TYPE_CACHE[i] = Character.getDirectionality(i);
750ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // Internal instance variables.
754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The text to be scanned.
757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private final String text;
759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Whether the text to be scanned is to be treated as HTML, i.e. skipping over tags and
762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * entities when looking for the next / preceding dir type.
763ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
764ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private final boolean isHtml;
765ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
766ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
767ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The length of the text in chars.
768ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
769ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private final int length;
770ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
772ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The current position in the text.
773ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private int charIndex;
775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
776ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
777ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * The char encountered by the last dirTypeForward or dirTypeBackward call. If it
778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * encountered a supplementary codepoint, this contains a char that is not a valid
779ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * codepoint. This is ok, because this member is only used to detect some well-known ASCII
780ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * syntax, e.g. "http://" and the beginning of an HTML tag or entity.
781ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
782ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private char lastChar;
783ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
784ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
785ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Constructor.
786ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
787ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param text The string to scan.
788ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @param isHtml Whether the text to be scanned is to be treated as HTML, i.e. skipping over
789ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *     tags and entities.
790ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
791ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        DirectionalityEstimator(String text, boolean isHtml) {
792ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this.text = text;
793ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this.isHtml = isHtml;
794ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            length = text.length();
795ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
796ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
797ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
798ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Returns the directionality of the first character with strong directionality in the
799ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * string, or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an
800ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL
801ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * after RLE/RLO. The results are undefined for a string containing unbalanced
802ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * LRE/RLE/LRO/RLO/PDF characters.
803ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
804ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        int getEntryDir() {
805ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // The reason for this method name, as opposed to getFirstStrongDir(), is that
806ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // "first strong" is a commonly used description of Unicode's estimation algorithm,
807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // but the two must treat formatting characters quite differently. Thus, we are staying
808ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // away from both "first" and "last" in these method names to avoid confusion.
809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            charIndex = 0;
810ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            int embeddingLevel = 0;
811ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            int embeddingLevelDir = DIR_UNKNOWN;
812ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            int firstNonEmptyEmbeddingLevel = 0;
813ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            while (charIndex < length && firstNonEmptyEmbeddingLevel == 0) {
814ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                switch (dirTypeForward()) {
815ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
816ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
817ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        ++embeddingLevel;
818ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        embeddingLevelDir = DIR_LTR;
819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
822ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        ++embeddingLevel;
823ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        embeddingLevelDir = DIR_RTL;
824ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
825ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        --embeddingLevel;
827ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        // To restore embeddingLevelDir to its previous value, we would need a
828ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        // stack, which we want to avoid. Thus, at this point we do not know the
829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        // current embedding's directionality.
830ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        embeddingLevelDir = DIR_UNKNOWN;
831ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (embeddingLevel == 0) {
836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            return DIR_LTR;
837ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
838ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        firstNonEmptyEmbeddingLevel = embeddingLevel;
839ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
842ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (embeddingLevel == 0) {
8431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                            return DIR_RTL;
844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
845ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        firstNonEmptyEmbeddingLevel = embeddingLevel;
846ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
847ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    default:
848ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        firstNonEmptyEmbeddingLevel = embeddingLevel;
849ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
850ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                }
851ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
852ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
853ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // We have either found a non-empty embedding or scanned the entire string finding
854ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // neither a non-empty embedding nor a strong character outside of an embedding.
855ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (firstNonEmptyEmbeddingLevel == 0) {
856ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                // We have not found a non-empty embedding. Thus, the string contains neither a
8575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                // non-empty embedding nor a strong character outside of an embedding.
858ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                return DIR_UNKNOWN;
8595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            }
860ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
861ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // We have found a non-empty embedding.
862ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (embeddingLevelDir != DIR_UNKNOWN) {
863ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                // We know the directionality of the non-empty embedding.
864ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                return embeddingLevelDir;
865ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
866ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
867ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // We do not remember the directionality of the non-empty embedding we found. So, we go
868ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // backwards to find the start of the non-empty embedding and get its directionality.
869ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            while (charIndex > 0) {
870ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                switch (dirTypeBackward()) {
871ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
872b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
873b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
874b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            return DIR_LTR;
875b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        }
876b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        --embeddingLevel;
877b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        break;
8781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
879ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
880ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
881ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            return DIR_RTL;
882ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
883ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        --embeddingLevel;
884ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
885ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        ++embeddingLevel;
887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                }
889ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
890ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // We should never get here.
891ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return DIR_UNKNOWN;
892ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
894ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
895ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Returns the directionality of the last character with strong directionality in the
896ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * string, or DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards
897ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * from the end of the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its
898ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * matching PDF as a strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results
89991037db265ecdd914a26e056cf69207b4f50924ehkuang         * are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF characters.
90091037db265ecdd914a26e056cf69207b4f50924ehkuang         */
90191037db265ecdd914a26e056cf69207b4f50924ehkuang        int getExitDir() {
902ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // The reason for this method name, as opposed to getLastStrongDir(), is that "last
903ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // strong" sounds like the exact opposite of "first strong", which is a commonly used
904ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // description of Unicode's estimation algorithm (getUnicodeDir() above), but the two
905ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // must treat formatting characters quite differently. Thus, we are staying away from
906ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            // both "first" and "last" in these method names to avoid confusion.
907ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            charIndex = length;
908ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            int embeddingLevel = 0;
909ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            int lastNonEmptyEmbeddingLevel = 0;
910ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            while (charIndex > 0) {
911ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                switch (dirTypeBackward()) {
912ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
913ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (embeddingLevel == 0) {
914ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            return DIR_LTR;
915ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
916ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (lastNonEmptyEmbeddingLevel == 0) {
917ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            lastNonEmptyEmbeddingLevel = embeddingLevel;
918ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
919ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
921ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
922ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
923ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            return DIR_LTR;
924ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
925ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        --embeddingLevel;
926ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
927ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (embeddingLevel == 0) {
930ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            return DIR_RTL;
931ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
932ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (lastNonEmptyEmbeddingLevel == 0) {
933ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            lastNonEmptyEmbeddingLevel = embeddingLevel;
934ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
935ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
936b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
937b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
938b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
939b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            return DIR_RTL;
940b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        }
941b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        --embeddingLevel;
9421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                        break;
943ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
944ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        ++embeddingLevel;
945ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
946ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
947ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
948ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                    default:
949ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        if (lastNonEmptyEmbeddingLevel == 0) {
950ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                            lastNonEmptyEmbeddingLevel = embeddingLevel;
951ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        }
952ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        break;
953ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                }
954ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
955ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return DIR_UNKNOWN;
956ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
957ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
958ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // Internal methods
959ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
960ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
961ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Gets the bidi character class, i.e. Character.getDirectionality(), of a given char, using
962ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * a cache for speed. Not designed for supplementary codepoints, whose results we do not
963ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * cache.
964ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
965ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        private static byte getCachedDirectionality(char c) {
966ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            return c < DIR_TYPE_CACHE_SIZE ? DIR_TYPE_CACHE[c] : Character.getDirectionality(c);
967ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
968b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
969ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /**
970ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * Returns the Character.DIRECTIONALITY_... value of the next codepoint and advances
9711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang         * charIndex. If isHtml, and the codepoint is '<' or '&', advances through the tag/entity,
9721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang         * and returns Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to
9731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang         * figure out the actual character, and return its dirtype, but treating it as whitespace is
9741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang         * good enough for our purposes.
975ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         *
976ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         * @throws java.lang.IndexOutOfBoundsException if called when charIndex >= length or < 0.
977ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang         */
978        byte dirTypeForward() {
979            lastChar = text.charAt(charIndex);
980            if (Character.isHighSurrogate(lastChar)) {
981                int codePoint = Character.codePointAt(text, charIndex);
982                charIndex += Character.charCount(codePoint);
983                return Character.getDirectionality(codePoint);
984            }
985            charIndex++;
986            byte dirType = getCachedDirectionality(lastChar);
987            if (isHtml) {
988                // Process tags and entities.
989                if (lastChar == '<') {
990                    dirType = skipTagForward();
991                } else if (lastChar == '&') {
992                    dirType = skipEntityForward();
993                }
994            }
995            return dirType;
996        }
997
998        /**
999         * Returns the Character.DIRECTIONALITY_... value of the preceding codepoint and advances
1000         * charIndex backwards. If isHtml, and the codepoint is the end of a complete HTML tag or
1001         * entity, advances over the whole tag/entity and returns
1002         * Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to figure out the
1003         * actual character, and return its dirtype, but treating it as whitespace is good enough
1004         * for our purposes.
1005         *
1006         * @throws java.lang.IndexOutOfBoundsException if called when charIndex > length or <= 0.
1007         */
1008        byte dirTypeBackward() {
1009            lastChar = text.charAt(charIndex - 1);
1010            if (Character.isLowSurrogate(lastChar)) {
1011                int codePoint = Character.codePointBefore(text, charIndex);
1012                charIndex -= Character.charCount(codePoint);
1013                return Character.getDirectionality(codePoint);
1014            }
1015            charIndex--;
1016            byte dirType = getCachedDirectionality(lastChar);
1017            if (isHtml) {
1018                // Process tags and entities.
1019                if (lastChar == '>') {
1020                    dirType = skipTagBackward();
1021                } else if (lastChar == ';') {
1022                    dirType = skipEntityBackward();
1023                }
1024            }
1025            return dirType;
1026        }
1027
1028        /**
1029         * Advances charIndex forward through an HTML tag (after the opening &lt; has already been
1030         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &gt;,
1031         * does not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the
1032         * &lt; that hadn't been part of a tag after all).
1033         */
1034        private byte skipTagForward() {
1035            int initialCharIndex = charIndex;
1036            while (charIndex < length) {
1037                lastChar = text.charAt(charIndex++);
1038                if (lastChar == '>') {
1039                    // The end of the tag.
1040                    return Character.DIRECTIONALITY_WHITESPACE;
1041                }
1042                if (lastChar == '"' || lastChar == '\'') {
1043                    // Skip over a quoted attribute value inside the tag.
1044                    char quote = lastChar;
1045                    while (charIndex < length && (lastChar = text.charAt(charIndex++)) != quote) {}
1046                }
1047            }
1048            // The original '<' wasn't the start of a tag after all.
1049            charIndex = initialCharIndex;
1050            lastChar = '<';
1051            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
1052        }
1053
1054        /**
1055         * Advances charIndex backward through an HTML tag (after the closing &gt; has already been
1056         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &lt;, does
1057         * not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the &gt;
1058         * that hadn't been part of a tag after all). Nevertheless, the running time for calling
1059         * skipTagBackward() in a loop remains linear in the size of the text, even for a text like
1060         * "&gt;&gt;&gt;&gt;", because skipTagBackward() also stops looking for a matching &lt;
1061         * when it encounters another &gt;.
1062         */
1063        private byte skipTagBackward() {
1064            int initialCharIndex = charIndex;
1065            while (charIndex > 0) {
1066                lastChar = text.charAt(--charIndex);
1067                if (lastChar == '<') {
1068                    // The start of the tag.
1069                    return Character.DIRECTIONALITY_WHITESPACE;
1070                }
1071                if (lastChar == '>') {
1072                    break;
1073                }
1074                if (lastChar == '"' || lastChar == '\'') {
1075                    // Skip over a quoted attribute value inside the tag.
1076                    char quote = lastChar;
1077                    while (charIndex > 0 && (lastChar = text.charAt(--charIndex)) != quote) {}
1078                }
1079            }
1080            // The original '>' wasn't the end of a tag after all.
1081            charIndex = initialCharIndex;
1082            lastChar = '>';
1083            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
1084        }
1085
1086        /**
1087         * Advances charIndex forward through an HTML character entity tag (after the opening
1088         * &amp; has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be
1089         * best to figure out the actual character and return its dirtype, but this is good enough.
1090         */
1091        private byte skipEntityForward() {
1092            while (charIndex < length && (lastChar = text.charAt(charIndex++)) != ';') {}
1093            return Character.DIRECTIONALITY_WHITESPACE;
1094        }
1095
1096        /**
1097         * Advances charIndex backward through an HTML character entity tag (after the closing ;
1098         * has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be best
1099         * to figure out the actual character and return its dirtype, but this is good enough.
1100         * If there is no matching &amp;, does not change charIndex and returns
1101         * Character.DIRECTIONALITY_OTHER_NEUTRALS (for the ';' that did not start an entity after
1102         * all). Nevertheless, the running time for calling skipEntityBackward() in a loop remains
1103         * linear in the size of the text, even for a text like ";;;;;;;", because skipTagBackward()
1104         * also stops looking for a matching &amp; when it encounters another ;.
1105         */
1106        private byte skipEntityBackward() {
1107            int initialCharIndex = charIndex;
1108            while (charIndex > 0) {
1109                lastChar = text.charAt(--charIndex);
1110                if (lastChar == '&') {
1111                    return Character.DIRECTIONALITY_WHITESPACE;
1112                }
1113                if (lastChar == ';') {
1114                    break;
1115                }
1116            }
1117            charIndex = initialCharIndex;
1118            lastChar = ';';
1119            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
1120        }
1121    }
1122}