157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio/*
257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * Copyright (C) 2013 The Android Open Source Project
357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *
457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * Licensed under the Apache License, Version 2.0 (the "License");
557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * you may not use this file except in compliance with the License.
657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * You may obtain a copy of the License at
757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *
857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *      http://www.apache.org/licenses/LICENSE-2.0
957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *
1057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * Unless required by applicable law or agreed to in writing, software
1157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * distributed under the License is distributed on an "AS IS" BASIS,
1257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * See the License for the specific language governing permissions and
1457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * limitations under the License.
1557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio */
1657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
178c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Megliopackage android.text;
1857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
1957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglioimport android.view.View;
2057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglioimport static android.text.TextDirectionHeuristics.FIRSTSTRONG_LTR;
2257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglioimport java.util.Locale;
2457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio/**
2657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * Utility class for formatting text for display in a potentially opposite-directionality context
2757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * without garbling. The directionality of the context is set at formatter creation and the
28f156cb31c0928cc739c4dc79813d13f92389877eRoozbeh Pournader * directionality of the text can be either estimated or passed in when known.
29e442662330c972aa96986e6f09305be836b6a7f4Scott Main *
30e442662330c972aa96986e6f09305be836b6a7f4Scott Main * <p>To support versions lower than {@link android.os.Build.VERSION_CODES#JELLY_BEAN_MR2},
31e442662330c972aa96986e6f09305be836b6a7f4Scott Main * you can use the support library's {@link android.support.v4.text.BidiFormatter} class.
32e442662330c972aa96986e6f09305be836b6a7f4Scott Main *
33e442662330c972aa96986e6f09305be836b6a7f4Scott Main * <p>These APIs provides the following functionality:
3457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * <p>
3557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * 1. Bidi Wrapping
3657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * When text in one language is mixed into a document in another, opposite-directionality language,
378c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio * e.g. when an English business name is embedded in some Hebrew text, both the inserted string
3857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * and the text surrounding it may be displayed incorrectly unless the inserted string is explicitly
3957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * separated from the surrounding text in a "wrapper" that:
4057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * <p>
418c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio * - Declares its directionality so that the string is displayed correctly. This can be done in
428c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio *   Unicode bidi formatting codes by {@link #unicodeWrap} and similar methods.
4357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * <p>
4457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * - Isolates the string's directionality, so it does not unduly affect the surrounding content.
4557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   Currently, this can only be done using invisible Unicode characters of the same direction as
4657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   the context (LRM or RLM) in addition to the directionality declaration above, thus "resetting"
4757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   the directionality to that of the context. The "reset" may need to be done at both ends of the
4857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   string. Without "reset" after the string, the string will "stick" to a number or logically
4957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   separate opposite-direction text that happens to follow it in-line (even if separated by
5057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   neutral content like spaces and punctuation). Without "reset" before the string, the same can
5157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   happen there, but only with more opposite-direction text, not a number. One approach is to
5257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   "reset" the direction only after each string, on the theory that if the preceding opposite-
5357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   direction text is itself bidi-wrapped, the "reset" after it will prevent the sticking. (Doing
5457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   the "reset" only before each string definitely does not work because we do not want to require
5557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   bidi-wrapping numbers, and a bidi-wrapped opposite-direction string could be followed by a
5657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   number.) Still, the safest policy is to do the "reset" on both ends of each string, since RTL
5757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   message translations often contain untranslated Latin-script brand names and technical terms,
5857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   and one of these can be followed by a bidi-wrapped inserted value. On the other hand, when one
5957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   has such a message, it is best to do the "reset" manually in the message translation itself,
6057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   since the message's opposite-direction text could be followed by an inserted number, which we
6157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   would not bidi-wrap anyway. Thus, "reset" only after the string is the current default. In an
6257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   alternative to "reset", recent additions to the HTML, CSS, and Unicode standards allow the
6357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   isolation to be part of the directionality declaration. This form of isolation is better than
6457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   "reset" because it takes less space, does not require knowing the context directionality, has a
6557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   gentler effect than "reset", and protects both ends of the string. However, we do not yet allow
6657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio *   using it because required platforms do not yet support it.
6757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * <p>
6857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * Providing these wrapping services is the basic purpose of the bidi formatter.
6957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * <p>
7057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * 2. Directionality estimation
7157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * How does one know whether a string about to be inserted into surrounding text has the same
7257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * directionality? Well, in many cases, one knows that this must be the case when writing the code
7357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * doing the insertion, e.g. when a localized message is inserted into a localized page. In such
7457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * cases there is no need to involve the bidi formatter at all. In some other cases, it need not be
7557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * the same as the context, but is either constant (e.g. urls are always LTR) or otherwise known.
7657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * In the remaining cases, e.g. when the string is user-entered or comes from a database, the
7757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * language of the string (and thus its directionality) is not known a priori, and must be
7857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * estimated at run-time. The bidi formatter can do this automatically using the default
7957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * first-strong estimation algorithm. It can also be configured to use a custom directionality
8057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio * estimation object.
8157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio */
8257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Megliopublic final class BidiFormatter {
8357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
8457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
8557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * The default text direction heuristic.
8657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
8757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static TextDirectionHeuristic DEFAULT_TEXT_DIRECTION_HEURISTIC = FIRSTSTRONG_LTR;
8857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
8957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
9057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Unicode "Left-To-Right Embedding" (LRE) character.
9157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
9257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final char LRE = '\u202A';
9357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
9457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
9557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Unicode "Right-To-Left Embedding" (RLE) character.
9657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
9757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final char RLE = '\u202B';
9857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
9957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
10057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Unicode "Pop Directional Formatting" (PDF) character.
10157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
10257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final char PDF = '\u202C';
10357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
10457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
10557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *  Unicode "Left-To-Right Mark" (LRM) character.
10657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
10757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final char LRM = '\u200E';
10857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
10957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /*
11057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Unicode "Right-To-Left Mark" (RLM) character.
11157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
11257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final char RLM = '\u200F';
11357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
11457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /*
11557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * String representation of LRM
11657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
11757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final String LRM_STRING = Character.toString(LRM);
11857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
11957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /*
12057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * String representation of RLM
12157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
12257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final String RLM_STRING = Character.toString(RLM);
12357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
12457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
12557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Empty string constant.
12657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
12757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final String EMPTY_STRING = "";
12857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
12957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
13057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * A class for building a BidiFormatter with non-default options.
13157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
13257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public static final class Builder {
1339889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        private boolean mIsRtlContext;
1349889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        private int mFlags;
1359889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        private TextDirectionHeuristic mTextDirectionHeuristic;
13657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
13757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
13857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Constructor.
13957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
14057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
14157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public Builder() {
14257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            initialize(isRtlLocale(Locale.getDefault()));
14357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
14457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
14557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
14657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Constructor.
14757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
14857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param rtlContext Whether the context directionality is RTL.
14957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
15057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public Builder(boolean rtlContext) {
15157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            initialize(rtlContext);
15257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
15357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
15457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
15557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Constructor.
15657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
15757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param locale The context locale.
15857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
15957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public Builder(Locale locale) {
16057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            initialize(isRtlLocale(locale));
16157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
16257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
16357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
16457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Initializes the builder with the given context directionality and default options.
16557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
16657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param isRtlContext Whether the context is RTL or not.
16757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
16857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private void initialize(boolean isRtlContext) {
1699889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            mIsRtlContext = isRtlContext;
1709889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            mTextDirectionHeuristic = DEFAULT_TEXT_DIRECTION_HEURISTIC;
1719889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            mFlags = DEFAULT_FLAGS;
17257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
17357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
17457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
17557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Specifies whether the BidiFormatter to be built should also "reset" directionality before
176007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader         * a string being bidi-wrapped, not just after it. The default is true.
17757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
17857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public Builder stereoReset(boolean stereoReset) {
17957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (stereoReset) {
1809889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio                mFlags |= FLAG_STEREO_RESET;
18157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            } else {
1829889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio                mFlags &= ~FLAG_STEREO_RESET;
18357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
18457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return this;
18557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
18657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
18757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
18857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Specifies the default directionality estimation algorithm to be used by the BidiFormatter.
18957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * By default, uses the first-strong heuristic.
19057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
19157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param heuristic the {@code TextDirectionHeuristic} to use.
19257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @return the builder itself.
19357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
19457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public Builder setTextDirectionHeuristic(TextDirectionHeuristic heuristic) {
1959889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            mTextDirectionHeuristic = heuristic;
19657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return this;
19757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
19857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
19957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private static BidiFormatter getDefaultInstanceFromContext(boolean isRtlContext) {
20057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return isRtlContext ? DEFAULT_RTL_INSTANCE : DEFAULT_LTR_INSTANCE;
20157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
20257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
20357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
20457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @return A BidiFormatter with the specified options.
20557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
20657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        public BidiFormatter build() {
2079889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            if (mFlags == DEFAULT_FLAGS &&
2089889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio                    mTextDirectionHeuristic == DEFAULT_TEXT_DIRECTION_HEURISTIC) {
2099889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio                return getDefaultInstanceFromContext(mIsRtlContext);
21057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
2119889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio            return new BidiFormatter(mIsRtlContext, mFlags, mTextDirectionHeuristic);
21257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
21357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
21457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2154d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    //
21657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final int FLAG_STEREO_RESET = 2;
21757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static final int DEFAULT_FLAGS = FLAG_STEREO_RESET;
21857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2194d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static final BidiFormatter DEFAULT_LTR_INSTANCE = new BidiFormatter(
2204d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            false /* LTR context */,
2214d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            DEFAULT_FLAGS,
2224d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            DEFAULT_TEXT_DIRECTION_HEURISTIC);
2234d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio
2244d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static final BidiFormatter DEFAULT_RTL_INSTANCE = new BidiFormatter(
2254d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            true /* RTL context */,
2264d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            DEFAULT_FLAGS,
2274d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            DEFAULT_TEXT_DIRECTION_HEURISTIC);
22857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
2299889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio    private final boolean mIsRtlContext;
2309889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio    private final int mFlags;
2319889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio    private final TextDirectionHeuristic mDefaultTextDirectionHeuristic;
23257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
23357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
234cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio     * Factory for creating an instance of BidiFormatter for the default locale directionality.
235cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio     *
236cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio     */
237cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio    public static BidiFormatter getInstance() {
238cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio        return new Builder().build();
239cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio    }
240cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio
241cd5af79bf96912f996397d0c06ceb02d11390238Fabrice Di Meglio    /**
24257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Factory for creating an instance of BidiFormatter given the context directionality.
24357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
24457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param rtlContext Whether the context directionality is RTL.
24557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
24657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public static BidiFormatter getInstance(boolean rtlContext) {
24757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return new Builder(rtlContext).build();
24857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
24957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
25057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
25157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Factory for creating an instance of BidiFormatter given the context locale.
25257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
25357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param locale The context locale.
25457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
25557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public static BidiFormatter getInstance(Locale locale) {
25657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return new Builder(locale).build();
25757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
25857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
25957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
26057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param isRtlContext Whether the context directionality is RTL or not.
26157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param flags The option flags.
26257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param heuristic The default text direction heuristic.
26357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
26457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private BidiFormatter(boolean isRtlContext, int flags, TextDirectionHeuristic heuristic) {
2659889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        mIsRtlContext = isRtlContext;
2669889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        mFlags = flags;
2679889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        mDefaultTextDirectionHeuristic = heuristic;
26857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
26957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
27057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
27157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return Whether the context directionality is RTL
27257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
27357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public boolean isRtlContext() {
2749889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        return mIsRtlContext;
27557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
27657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
27757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
27857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return Whether directionality "reset" should also be done before a string being
27957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * bidi-wrapped, not just after it.
28057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
28157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public boolean getStereoReset() {
2829889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        return (mFlags & FLAG_STEREO_RESET) != 0;
28357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
28457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
28557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
28657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
28757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * overall or the exit directionality of a given string is opposite to the context directionality.
28857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Putting this after the string (including its directionality declaration wrapping) prevents it
28957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * from "sticking" to other opposite-directionality text or a number appearing after it inline
29057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * with only neutral content in between. Otherwise returns the empty string. While the exit
29157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * directionality is determined by scanning the end of the string, the overall directionality is
2928c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * given explicitly by a heuristic to estimate the {@code str}'s directionality.
29357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
29457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str String after which the mark may need to appear.
29557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
29657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *                  directionality.
29757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
29857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *     else, the empty string.
2998c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     *
3008c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * @hide
30157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
30257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String markAfter(String str, TextDirectionHeuristic heuristic) {
30357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
30457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        // getExitDir() is called only if needed (short-circuit).
3059889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        if (!mIsRtlContext && (isRtl || getExitDir(str) == DIR_RTL)) {
30657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return LRM_STRING;
30757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
3089889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        if (mIsRtlContext && (!isRtl || getExitDir(str) == DIR_LTR)) {
30957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return RLM_STRING;
31057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
31157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return EMPTY_STRING;
31257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
31357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
31457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
31557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
31657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * overall or the entry directionality of a given string is opposite to the context
31757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * directionality. Putting this before the string (including its directionality declaration
3188c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before
3198c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * it inline with only neutral content in between. Otherwise returns the empty string. While the
32057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * entry directionality is determined by scanning the beginning of the string, the overall
3218c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * directionality is given explicitly by a heuristic to estimate the {@code str}'s directionality.
32257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
32357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str String before which the mark may need to appear.
32457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
32557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *                  directionality.
32657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
32757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *     else, the empty string.
3288c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     *
3298c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * @hide
33057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
33157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String markBefore(String str, TextDirectionHeuristic heuristic) {
33257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
33357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        // getEntryDir() is called only if needed (short-circuit).
3349889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        if (!mIsRtlContext && (isRtl || getEntryDir(str) == DIR_RTL)) {
33557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return LRM_STRING;
33657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
3379889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        if (mIsRtlContext && (!isRtl || getEntryDir(str) == DIR_LTR)) {
33857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return RLM_STRING;
33957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
34057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return EMPTY_STRING;
34157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
34257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
34357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
34457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Estimates the directionality of a string using the default text direction heuristic.
34557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
34657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str String whose directionality is to be estimated.
34757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return true if {@code str}'s estimated overall directionality is RTL. Otherwise returns
34857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *          false.
34957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
35057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public boolean isRtl(String str) {
3519889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        return mDefaultTextDirectionHeuristic.isRtl(str, 0, str.length());
35257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
35357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
35457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
35557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Formats a string of given directionality for use in plain-text output of the context
35657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * directionality, so an opposite-directionality string is neither garbled nor garbles its
3578c08fc9a86b28c54968bde2dcbb46d744f1b6201Fabrice Di Meglio     * surroundings. This makes use of Unicode bidi formatting characters.
35857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * <p>
35957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * The algorithm: In case the given directionality doesn't match the context directionality, wraps
36057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * the string with Unicode bidi formatting characters: RLE+{@code str}+PDF for RTL text, or
36157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * LRE+{@code str}+PDF for LTR text.
36257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * <p>
36357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * If {@code isolate}, directionally isolates the string so that it does not garble its
36457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * surroundings. Currently, this is done by "resetting" the directionality after the string by
36557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when
366007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * either the overall directionality or the exit directionality of the string is opposite to
367007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * that of the context. Unless the formatter was built using
368007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * {@link Builder#stereoReset(boolean)} with a {@code false} argument, also prepends a Unicode
369007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * bidi mark matching the context directionality when either the overall directionality or the
370007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * entry directionality of the string is opposite to that of the context. Note that as opposed
371007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * to the overall directionality, the entry and exit directionalities are determined from the
372007262e0aef80b30034b9ddd702ea252d9a71f4cRoozbeh Pournader     * string itself.
37357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * <p>
37457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Does *not* do HTML-escaping.
37557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
37657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str The input string.
37757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param heuristic The algorithm to be used to estimate the string's overall direction.
378e442662330c972aa96986e6f09305be836b6a7f4Scott Main     *        See {@link TextDirectionHeuristics} for pre-defined heuristics.
37957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
38057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *     content around it
381f156cb31c0928cc739c4dc79813d13f92389877eRoozbeh Pournader     * @return Input string after applying the above processing. {@code null} if {@code str} is
382f156cb31c0928cc739c4dc79813d13f92389877eRoozbeh Pournader     *     {@code null}.
38357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
38457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String unicodeWrap(String str, TextDirectionHeuristic heuristic, boolean isolate) {
385f156cb31c0928cc739c4dc79813d13f92389877eRoozbeh Pournader        if (str == null) return null;
38657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
38757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        StringBuilder result = new StringBuilder();
38857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        if (getStereoReset() && isolate) {
38957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(markBefore(str,
39057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
39157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
3929889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        if (isRtl != mIsRtlContext) {
39357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(isRtl ? RLE : LRE);
39457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(str);
39557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(PDF);
39657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        } else {
39757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(str);
39857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
39957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        if (isolate) {
40057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            result.append(markAfter(str,
40157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
40257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
40357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return result.toString();
40457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
40557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
40657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
40757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but assumes
40857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * {@code isolate} is true.
40957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
41057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str The input string.
41157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param heuristic The algorithm to be used to estimate the string's overall direction.
412e442662330c972aa96986e6f09305be836b6a7f4Scott Main     *        See {@link TextDirectionHeuristics} for pre-defined heuristics.
41357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return Input string after applying the above processing.
41457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
41557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String unicodeWrap(String str, TextDirectionHeuristic heuristic) {
41657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return unicodeWrap(str, heuristic, true /* isolate */);
41757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
41857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
41957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
42057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but uses the
42157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * formatter's default direction estimation algorithm.
42257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
42357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str The input string.
42457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
42557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *     content around it
42657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return Input string after applying the above processing.
42757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
42857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String unicodeWrap(String str, boolean isolate) {
4299889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        return unicodeWrap(str, mDefaultTextDirectionHeuristic, isolate);
43057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
43157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
43257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
43357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but uses the
43457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * formatter's default direction estimation algorithm and assumes {@code isolate} is true.
43557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
43657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str The input string.
43757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return Input string after applying the above processing.
43857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
43957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    public String unicodeWrap(String str) {
4409889d559f3b0500a16d89b8831837c415adcc98bFabrice Di Meglio        return unicodeWrap(str, mDefaultTextDirectionHeuristic, true /* isolate */);
44157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
44257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
44357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
44457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Helper method to return true if the Locale directionality is RTL.
44557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
44657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param locale The Locale whose directionality will be checked to be RTL or LTR
44757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @return true if the {@code locale} directionality is RTL. False otherwise.
44857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
44957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static boolean isRtlLocale(Locale locale) {
45057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return (TextUtils.getLayoutDirectionFromLocale(locale) == View.LAYOUT_DIRECTION_RTL);
45157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
45257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
45357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
45457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Enum for directionality type.
45557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
4564d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static final int DIR_LTR = -1;
4574d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static final int DIR_UNKNOWN = 0;
4584d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static final int DIR_RTL = +1;
45957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
46057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
46157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Returns the directionality of the last character with strong directionality in the string, or
4624d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio     * DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards from the end of
46357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its matching PDF as a
46457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results are undefined for a
46557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * string containing unbalanced LRE/RLE/LRO/RLO/PDF characters. The intended use is to check
46657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * whether a logically separate item that starts with a number or a character of the string's
46757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * exit directionality and follows this string inline (not counting any neutral characters in
46857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * between) would "stick" to it in an opposite-directionality context, thus being displayed in
46957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * an incorrect position. An LRM or RLM character (the one of the context's directionality)
47057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * between the two will prevent such sticking.
47157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
47257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str the string to check.
47357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
4744d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static int getExitDir(String str) {
47557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return new DirectionalityEstimator(str, false /* isHtml */).getExitDir();
47657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
47757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
47857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
47957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * Returns the directionality of the first character with strong directionality in the string,
4804d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio     * or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an
48157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL after
48257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * RLE/RLO. The results are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF
48357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * characters. The intended use is to check whether a logically separate item that ends with a
48457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * character of the string's entry directionality and precedes the string inline (not counting
48557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * any neutral characters in between) would "stick" to it in an opposite-directionality context,
48657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * thus being displayed in an incorrect position. An LRM or RLM character (the one of the
48757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * context's directionality) between the two will prevent such sticking.
48857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
48957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * @param str the string to check.
49057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
4914d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio    private static int getEntryDir(String str) {
49257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        return new DirectionalityEstimator(str, false /* isHtml */).getEntryDir();
49357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
49457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
49557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    /**
49657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     * An object that estimates the directionality of a given string by various methods.
49757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     *
49857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio     */
49957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    private static class DirectionalityEstimator {
50057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
50157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        // Internal static variables and constants.
50257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
50357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
50457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Size of the bidi character class cache. The results of the Character.getDirectionality()
50557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * calls on the lowest DIR_TYPE_CACHE_SIZE codepoints are kept in an array for speed.
50657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The 0x700 value is designed to leave all the European and Near Eastern languages in the
50757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * cache. It can be reduced to 0x180, restricting the cache to the Western European
50857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * languages.
50957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
51057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private static final int DIR_TYPE_CACHE_SIZE = 0x700;
51157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
51257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
51357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The bidi character class cache.
51457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
51557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private static final byte DIR_TYPE_CACHE[];
51657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
51757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        static {
51857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            DIR_TYPE_CACHE = new byte[DIR_TYPE_CACHE_SIZE];
51957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            for (int i = 0; i < DIR_TYPE_CACHE_SIZE; i++) {
52057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                DIR_TYPE_CACHE[i] = Character.getDirectionality(i);
52157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
52257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
52357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
52457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        // Internal instance variables.
52557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
52657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
52757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The text to be scanned.
52857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
52957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private final String text;
53057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
53157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
53257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Whether the text to be scanned is to be treated as HTML, i.e. skipping over tags and
53357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * entities when looking for the next / preceding dir type.
53457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
53557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private final boolean isHtml;
53657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
53757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
53857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The length of the text in chars.
53957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
54057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private final int length;
54157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
54257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
54357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The current position in the text.
54457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
54557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private int charIndex;
54657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
54757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
54857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * The char encountered by the last dirTypeForward or dirTypeBackward call. If it
54957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * encountered a supplementary codepoint, this contains a char that is not a valid
55057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * codepoint. This is ok, because this member is only used to detect some well-known ASCII
55157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * syntax, e.g. "http://" and the beginning of an HTML tag or entity.
55257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
55357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private char lastChar;
55457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
55557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
55657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Constructor.
55757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
55857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param text The string to scan.
55957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @param isHtml Whether the text to be scanned is to be treated as HTML, i.e. skipping over
56057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *     tags and entities.
56157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
56257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        DirectionalityEstimator(String text, boolean isHtml) {
56357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            this.text = text;
56457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            this.isHtml = isHtml;
56557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            length = text.length();
56657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
56757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
56857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
56957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Returns the directionality of the first character with strong directionality in the
5704d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio         * string, or DIR_UNKNOWN if none was encountered. Treats a non-BN character between an
57157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL
57257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * after RLE/RLO. The results are undefined for a string containing unbalanced
57357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * LRE/RLE/LRO/RLO/PDF characters.
57457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
5754d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio        int getEntryDir() {
57657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // The reason for this method name, as opposed to getFirstStrongDir(), is that
57757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // "first strong" is a commonly used description of Unicode's estimation algorithm,
57857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // but the two must treat formatting characters quite differently. Thus, we are staying
57957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // away from both "first" and "last" in these method names to avoid confusion.
58057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex = 0;
58157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int embeddingLevel = 0;
5824d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            int embeddingLevelDir = DIR_UNKNOWN;
58357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int firstNonEmptyEmbeddingLevel = 0;
58457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex < length && firstNonEmptyEmbeddingLevel == 0) {
58557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                switch (dirTypeForward()) {
58657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
58757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
58857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        ++embeddingLevel;
5894d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                        embeddingLevelDir = DIR_LTR;
59057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
59157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
59257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
59357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        ++embeddingLevel;
5944d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                        embeddingLevelDir = DIR_RTL;
59557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
59657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
59757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        --embeddingLevel;
59857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        // To restore embeddingLevelDir to its previous value, we would need a
59957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        // stack, which we want to avoid. Thus, at this point we do not know the
60057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        // current embedding's directionality.
6014d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                        embeddingLevelDir = DIR_UNKNOWN;
60257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
60357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
60457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
60557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
60657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (embeddingLevel == 0) {
6074d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_LTR;
60857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
60957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        firstNonEmptyEmbeddingLevel = embeddingLevel;
61057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
61157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
61257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
61357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (embeddingLevel == 0) {
6144d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_RTL;
61557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
61657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        firstNonEmptyEmbeddingLevel = embeddingLevel;
61757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
61857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    default:
61957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        firstNonEmptyEmbeddingLevel = embeddingLevel;
62057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
62157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
62257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
62357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
62457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // We have either found a non-empty embedding or scanned the entire string finding
62557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // neither a non-empty embedding nor a strong character outside of an embedding.
62657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (firstNonEmptyEmbeddingLevel == 0) {
62757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                // We have not found a non-empty embedding. Thus, the string contains neither a
62857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                // non-empty embedding nor a strong character outside of an embedding.
6294d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                return DIR_UNKNOWN;
63057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
63157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
63257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // We have found a non-empty embedding.
6334d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            if (embeddingLevelDir != DIR_UNKNOWN) {
63457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                // We know the directionality of the non-empty embedding.
63557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                return embeddingLevelDir;
63657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
63757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
63857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // We do not remember the directionality of the non-empty embedding we found. So, we go
63957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // backwards to find the start of the non-empty embedding and get its directionality.
64057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex > 0) {
64157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                switch (dirTypeBackward()) {
64257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
64357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
64457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
6454d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_LTR;
64657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
64757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        --embeddingLevel;
64857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
64957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
65057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
65157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
6524d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_RTL;
65357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
65457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        --embeddingLevel;
65557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
65657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
65757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        ++embeddingLevel;
65857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
65957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
66057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
66157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // We should never get here.
6624d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            return DIR_UNKNOWN;
66357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
66457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
66557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
66657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Returns the directionality of the last character with strong directionality in the
6674d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio         * string, or DIR_UNKNOWN if none was encountered. For efficiency, actually scans backwards
66857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * from the end of the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its
66957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * matching PDF as a strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results
67057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF characters.
67157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
6724d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio        int getExitDir() {
67357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // The reason for this method name, as opposed to getLastStrongDir(), is that "last
67457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // strong" sounds like the exact opposite of "first strong", which is a commonly used
67557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // description of Unicode's estimation algorithm (getUnicodeDir() above), but the two
67657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // must treat formatting characters quite differently. Thus, we are staying away from
67757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // both "first" and "last" in these method names to avoid confusion.
67857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex = length;
67957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int embeddingLevel = 0;
68057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int lastNonEmptyEmbeddingLevel = 0;
68157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex > 0) {
68257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                switch (dirTypeBackward()) {
68357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
68457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (embeddingLevel == 0) {
6854d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_LTR;
68657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
68757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (lastNonEmptyEmbeddingLevel == 0) {
68857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                            lastNonEmptyEmbeddingLevel = embeddingLevel;
68957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
69057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
69157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
69257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
69357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
6944d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_LTR;
69557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
69657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        --embeddingLevel;
69757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
69857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
69957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
70057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (embeddingLevel == 0) {
7014d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_RTL;
70257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
70357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (lastNonEmptyEmbeddingLevel == 0) {
70457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                            lastNonEmptyEmbeddingLevel = embeddingLevel;
70557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
70657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
70757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
70857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
70957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
7104d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio                            return DIR_RTL;
71157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
71257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        --embeddingLevel;
71357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
71457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
71557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        ++embeddingLevel;
71657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
71757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
71857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
71957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    default:
72057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        if (lastNonEmptyEmbeddingLevel == 0) {
72157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                            lastNonEmptyEmbeddingLevel = embeddingLevel;
72257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        }
72357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                        break;
72457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
72557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
7264d1937b731d304fffb23a6f37778a84ef09a97e6Fabrice Di Meglio            return DIR_UNKNOWN;
72757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
72857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
72957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        // Internal methods
73057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
73157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
73257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Gets the bidi character class, i.e. Character.getDirectionality(), of a given char, using
73357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * a cache for speed. Not designed for supplementary codepoints, whose results we do not
73457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * cache.
73557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
73657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private static byte getCachedDirectionality(char c) {
73757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return c < DIR_TYPE_CACHE_SIZE ? DIR_TYPE_CACHE[c] : Character.getDirectionality(c);
73857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
73957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
74057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
74157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Returns the Character.DIRECTIONALITY_... value of the next codepoint and advances
74257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * charIndex. If isHtml, and the codepoint is '<' or '&', advances through the tag/entity,
74357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * and returns Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to
74457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * figure out the actual character, and return its dirtype, but treating it as whitespace is
74557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * good enough for our purposes.
74657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
74757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @throws java.lang.IndexOutOfBoundsException if called when charIndex >= length or < 0.
74857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
74957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        byte dirTypeForward() {
75057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            lastChar = text.charAt(charIndex);
75157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (Character.isHighSurrogate(lastChar)) {
75257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                int codePoint = Character.codePointAt(text, charIndex);
75357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                charIndex += Character.charCount(codePoint);
75457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                return Character.getDirectionality(codePoint);
75557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
75657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex++;
75757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            byte dirType = getCachedDirectionality(lastChar);
75857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (isHtml) {
75957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                // Process tags and entities.
76057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '<') {
76157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    dirType = skipTagForward();
76257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                } else if (lastChar == '&') {
76357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    dirType = skipEntityForward();
76457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
76557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
76657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return dirType;
76757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
76857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
76957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
77057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Returns the Character.DIRECTIONALITY_... value of the preceding codepoint and advances
77157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * charIndex backwards. If isHtml, and the codepoint is the end of a complete HTML tag or
77257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * entity, advances over the whole tag/entity and returns
77357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to figure out the
77457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * actual character, and return its dirtype, but treating it as whitespace is good enough
77557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * for our purposes.
77657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         *
77757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * @throws java.lang.IndexOutOfBoundsException if called when charIndex > length or <= 0.
77857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
77957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        byte dirTypeBackward() {
78057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            lastChar = text.charAt(charIndex - 1);
78157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (Character.isLowSurrogate(lastChar)) {
78257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                int codePoint = Character.codePointBefore(text, charIndex);
78357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                charIndex -= Character.charCount(codePoint);
78457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                return Character.getDirectionality(codePoint);
78557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
78657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex--;
78757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            byte dirType = getCachedDirectionality(lastChar);
78857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            if (isHtml) {
78957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                // Process tags and entities.
79057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '>') {
79157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    dirType = skipTagBackward();
79257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                } else if (lastChar == ';') {
79357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    dirType = skipEntityBackward();
79457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
79557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
79657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return dirType;
79757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
79857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
79957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
80057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Advances charIndex forward through an HTML tag (after the opening &lt; has already been
80157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &gt;,
80257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * does not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the
80357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * &lt; that hadn't been part of a tag after all).
80457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
80557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private byte skipTagForward() {
80657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int initialCharIndex = charIndex;
80757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex < length) {
80857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                lastChar = text.charAt(charIndex++);
80957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '>') {
81057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    // The end of the tag.
81157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    return Character.DIRECTIONALITY_WHITESPACE;
81257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
81357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '"' || lastChar == '\'') {
81457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    // Skip over a quoted attribute value inside the tag.
81557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    char quote = lastChar;
81657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    while (charIndex < length && (lastChar = text.charAt(charIndex++)) != quote) {}
81757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
81857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
81957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // The original '<' wasn't the start of a tag after all.
82057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex = initialCharIndex;
82157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            lastChar = '<';
82257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
82357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
82457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
82557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
82657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Advances charIndex backward through an HTML tag (after the closing &gt; has already been
82757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &lt;, does
82857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the &gt;
82957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * that hadn't been part of a tag after all). Nevertheless, the running time for calling
83057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * skipTagBackward() in a loop remains linear in the size of the text, even for a text like
83157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * "&gt;&gt;&gt;&gt;", because skipTagBackward() also stops looking for a matching &lt;
83257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * when it encounters another &gt;.
83357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
83457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private byte skipTagBackward() {
83557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int initialCharIndex = charIndex;
83657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex > 0) {
83757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                lastChar = text.charAt(--charIndex);
83857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '<') {
83957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    // The start of the tag.
84057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    return Character.DIRECTIONALITY_WHITESPACE;
84157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
84257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '>') {
84357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    break;
84457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
84557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '"' || lastChar == '\'') {
84657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    // Skip over a quoted attribute value inside the tag.
84757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    char quote = lastChar;
84857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    while (charIndex > 0 && (lastChar = text.charAt(--charIndex)) != quote) {}
84957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
85057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
85157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            // The original '>' wasn't the end of a tag after all.
85257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex = initialCharIndex;
85357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            lastChar = '>';
85457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
85557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
85657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
85757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
85857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Advances charIndex forward through an HTML character entity tag (after the opening
85957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * &amp; has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be
86057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * best to figure out the actual character and return its dirtype, but this is good enough.
86157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
86257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private byte skipEntityForward() {
86357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex < length && (lastChar = text.charAt(charIndex++)) != ';') {}
86457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return Character.DIRECTIONALITY_WHITESPACE;
86557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
86657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio
86757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        /**
86857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Advances charIndex backward through an HTML character entity tag (after the closing ;
86957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be best
87057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * to figure out the actual character and return its dirtype, but this is good enough.
87157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * If there is no matching &amp;, does not change charIndex and returns
87257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * Character.DIRECTIONALITY_OTHER_NEUTRALS (for the ';' that did not start an entity after
87357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * all). Nevertheless, the running time for calling skipEntityBackward() in a loop remains
87457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * linear in the size of the text, even for a text like ";;;;;;;", because skipTagBackward()
87557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         * also stops looking for a matching &amp; when it encounters another ;.
87657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio         */
87757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        private byte skipEntityBackward() {
87857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            int initialCharIndex = charIndex;
87957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            while (charIndex > 0) {
88057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                lastChar = text.charAt(--charIndex);
88157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == '&') {
88257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    return Character.DIRECTIONALITY_WHITESPACE;
88357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
88457a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                if (lastChar == ';') {
88557a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                    break;
88657a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio                }
88757a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            }
88857a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            charIndex = initialCharIndex;
88957a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            lastChar = ';';
89057a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
89157a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio        }
89257a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio    }
89357a85740d721caf8dcd94a545b2dd920e8e84e01Fabrice Di Meglio}