1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.text;
18
19
20import android.view.View;
21
22import java.nio.CharBuffer;
23
24/**
25 * Some objects that implement {@link TextDirectionHeuristic}. Use these with
26 * the {@link BidiFormatter#unicodeWrap unicodeWrap()} methods in {@link BidiFormatter}.
27 * Also notice that these direction heuristics correspond to the same types of constants
28 * provided in the {@link android.view.View} class for {@link android.view.View#setTextDirection
29 * setTextDirection()}, such as {@link android.view.View#TEXT_DIRECTION_RTL}.
30 * <p>To support versions lower than {@link android.os.Build.VERSION_CODES#JELLY_BEAN_MR2},
31 * you can use the support library's {@link android.support.v4.text.TextDirectionHeuristicsCompat}
32 * class.
33 *
34 */
35public class TextDirectionHeuristics {
36
37    /**
38     * Always decides that the direction is left to right.
39     */
40    public static final TextDirectionHeuristic LTR =
41        new TextDirectionHeuristicInternal(null /* no algorithm */, false);
42
43    /**
44     * Always decides that the direction is right to left.
45     */
46    public static final TextDirectionHeuristic RTL =
47        new TextDirectionHeuristicInternal(null /* no algorithm */, true);
48
49    /**
50     * Determines the direction based on the first strong directional character, including bidi
51     * format chars, falling back to left to right if it finds none. This is the default behavior
52     * of the Unicode Bidirectional Algorithm.
53     */
54    public static final TextDirectionHeuristic FIRSTSTRONG_LTR =
55        new TextDirectionHeuristicInternal(FirstStrong.INSTANCE, false);
56
57    /**
58     * Determines the direction based on the first strong directional character, including bidi
59     * format chars, falling back to right to left if it finds none. This is similar to the default
60     * behavior of the Unicode Bidirectional Algorithm, just with different fallback behavior.
61     */
62    public static final TextDirectionHeuristic FIRSTSTRONG_RTL =
63        new TextDirectionHeuristicInternal(FirstStrong.INSTANCE, true);
64
65    /**
66     * If the text contains any strong right to left non-format character, determines that the
67     * direction is right to left, falling back to left to right if it finds none.
68     */
69    public static final TextDirectionHeuristic ANYRTL_LTR =
70        new TextDirectionHeuristicInternal(AnyStrong.INSTANCE_RTL, false);
71
72    /**
73     * Force the paragraph direction to the Locale direction. Falls back to left to right.
74     */
75    public static final TextDirectionHeuristic LOCALE = TextDirectionHeuristicLocale.INSTANCE;
76
77    /**
78     * State constants for taking care about true / false / unknown
79     */
80    private static final int STATE_TRUE = 0;
81    private static final int STATE_FALSE = 1;
82    private static final int STATE_UNKNOWN = 2;
83
84    /* Returns STATE_TRUE for strong RTL characters, STATE_FALSE for strong LTR characters, and
85     * STATE_UNKNOWN for everything else.
86     */
87    private static int isRtlCodePoint(int codePoint) {
88        switch (Character.getDirectionality(codePoint)) {
89            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
90                return STATE_FALSE;
91            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
92            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
93                return STATE_TRUE;
94            case Character.DIRECTIONALITY_UNDEFINED:
95                // Unassigned characters still have bidi direction, defined at:
96                // http://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedBidiClass.txt
97
98                if ((0x0590 <= codePoint && codePoint <= 0x08FF) ||
99                        (0xFB1D <= codePoint && codePoint <= 0xFDCF) ||
100                        (0xFDF0 <= codePoint && codePoint <= 0xFDFF) ||
101                        (0xFE70 <= codePoint && codePoint <= 0xFEFF) ||
102                        (0x10800 <= codePoint && codePoint <= 0x10FFF) ||
103                        (0x1E800 <= codePoint && codePoint <= 0x1EFFF)) {
104                    // Unassigned RTL character
105                    return STATE_TRUE;
106                } else if (
107                        // Potentially-unassigned Default_Ignorable. Ranges are from unassigned
108                        // characters that have Unicode property Other_Default_Ignorable_Code_Point
109                        // plus some enlargening to cover bidi isolates and simplify checks.
110                        (0x2065 <= codePoint && codePoint <= 0x2069) ||
111                        (0xFFF0 <= codePoint && codePoint <= 0xFFF8) ||
112                        (0xE0000 <= codePoint && codePoint <= 0xE0FFF) ||
113                        // Non-character
114                        (0xFDD0 <= codePoint && codePoint <= 0xFDEF) ||
115                        ((codePoint & 0xFFFE) == 0xFFFE) ||
116                        // Currency symbol
117                        (0x20A0 <= codePoint && codePoint <= 0x20CF) ||
118                        // Unpaired surrogate
119                        (0xD800 <= codePoint && codePoint <= 0xDFFF)) {
120                    return STATE_UNKNOWN;
121                } else {
122                    // Unassigned LTR character
123                    return STATE_FALSE;
124                }
125            default:
126                return STATE_UNKNOWN;
127        }
128    }
129
130    /**
131     * Computes the text direction based on an algorithm.  Subclasses implement
132     * {@link #defaultIsRtl} to handle cases where the algorithm cannot determine the
133     * direction from the text alone.
134     */
135    private static abstract class TextDirectionHeuristicImpl implements TextDirectionHeuristic {
136        private final TextDirectionAlgorithm mAlgorithm;
137
138        public TextDirectionHeuristicImpl(TextDirectionAlgorithm algorithm) {
139            mAlgorithm = algorithm;
140        }
141
142        /**
143         * Return true if the default text direction is rtl.
144         */
145        abstract protected boolean defaultIsRtl();
146
147        @Override
148        public boolean isRtl(char[] array, int start, int count) {
149            return isRtl(CharBuffer.wrap(array), start, count);
150        }
151
152        @Override
153        public boolean isRtl(CharSequence cs, int start, int count) {
154            if (cs == null || start < 0 || count < 0 || cs.length() - count < start) {
155                throw new IllegalArgumentException();
156            }
157            if (mAlgorithm == null) {
158                return defaultIsRtl();
159            }
160            return doCheck(cs, start, count);
161        }
162
163        private boolean doCheck(CharSequence cs, int start, int count) {
164            switch(mAlgorithm.checkRtl(cs, start, count)) {
165                case STATE_TRUE:
166                    return true;
167                case STATE_FALSE:
168                    return false;
169                default:
170                    return defaultIsRtl();
171            }
172        }
173    }
174
175    private static class TextDirectionHeuristicInternal extends TextDirectionHeuristicImpl {
176        private final boolean mDefaultIsRtl;
177
178        private TextDirectionHeuristicInternal(TextDirectionAlgorithm algorithm,
179                boolean defaultIsRtl) {
180            super(algorithm);
181            mDefaultIsRtl = defaultIsRtl;
182        }
183
184        @Override
185        protected boolean defaultIsRtl() {
186            return mDefaultIsRtl;
187        }
188    }
189
190    /**
191     * Interface for an algorithm to guess the direction of a paragraph of text.
192     */
193    private static interface TextDirectionAlgorithm {
194        /**
195         * Returns whether the range of text is RTL according to the algorithm.
196         */
197        int checkRtl(CharSequence cs, int start, int count);
198    }
199
200    /**
201     * Algorithm that uses the first strong directional character to determine the paragraph
202     * direction. This is the standard Unicode Bidirectional Algorithm (steps P2 and P3), with the
203     * exception that if no strong character is found, UNKNOWN is returned.
204     */
205    private static class FirstStrong implements TextDirectionAlgorithm {
206        @Override
207        public int checkRtl(CharSequence cs, int start, int count) {
208            int result = STATE_UNKNOWN;
209            int openIsolateCount = 0;
210            for (int cp, i = start, end = start + count;
211                    i < end && result == STATE_UNKNOWN;
212                    i += Character.charCount(cp)) {
213                cp = Character.codePointAt(cs, i);
214                if (0x2066 <= cp && cp <= 0x2068) { // Opening isolates
215                    openIsolateCount += 1;
216                } else if (cp == 0x2069) { // POP DIRECTIONAL ISOLATE (PDI)
217                    if (openIsolateCount > 0) openIsolateCount -= 1;
218                } else if (openIsolateCount == 0) {
219                    // Only consider the characters outside isolate pairs
220                    result = isRtlCodePoint(cp);
221                }
222            }
223            return result;
224        }
225
226        private FirstStrong() {
227        }
228
229        public static final FirstStrong INSTANCE = new FirstStrong();
230    }
231
232    /**
233     * Algorithm that uses the presence of any strong directional character of the type indicated
234     * in the constructor parameter to determine the direction of text.
235     *
236     * Characters inside isolate pairs are skipped.
237     */
238    private static class AnyStrong implements TextDirectionAlgorithm {
239        private final boolean mLookForRtl;
240
241        @Override
242        public int checkRtl(CharSequence cs, int start, int count) {
243            boolean haveUnlookedFor = false;
244            int openIsolateCount = 0;
245            for (int cp, i = start, end = start + count; i < end; i += Character.charCount(cp)) {
246                cp = Character.codePointAt(cs, i);
247                if (0x2066 <= cp && cp <= 0x2068) { // Opening isolates
248                    openIsolateCount += 1;
249                } else if (cp == 0x2069) { // POP DIRECTIONAL ISOLATE (PDI)
250                    if (openIsolateCount > 0) openIsolateCount -= 1;
251                } else if (openIsolateCount == 0) {
252                    // Only consider the characters outside isolate pairs
253                    switch (isRtlCodePoint(cp)) {
254                        case STATE_TRUE:
255                            if (mLookForRtl) {
256                                return STATE_TRUE;
257                            }
258                            haveUnlookedFor = true;
259                            break;
260                        case STATE_FALSE:
261                            if (!mLookForRtl) {
262                                return STATE_FALSE;
263                            }
264                            haveUnlookedFor = true;
265                            break;
266                        default:
267                            break;
268                    }
269                }
270            }
271            if (haveUnlookedFor) {
272                return mLookForRtl ? STATE_FALSE : STATE_TRUE;
273            }
274            return STATE_UNKNOWN;
275        }
276
277        private AnyStrong(boolean lookForRtl) {
278            this.mLookForRtl = lookForRtl;
279        }
280
281        public static final AnyStrong INSTANCE_RTL = new AnyStrong(true);
282        public static final AnyStrong INSTANCE_LTR = new AnyStrong(false);
283    }
284
285    /**
286     * Algorithm that uses the Locale direction to force the direction of a paragraph.
287     */
288    private static class TextDirectionHeuristicLocale extends TextDirectionHeuristicImpl {
289
290        public TextDirectionHeuristicLocale() {
291            super(null);
292        }
293
294        @Override
295        protected boolean defaultIsRtl() {
296            final int dir = TextUtils.getLayoutDirectionFromLocale(java.util.Locale.getDefault());
297            return (dir == View.LAYOUT_DIRECTION_RTL);
298        }
299
300        public static final TextDirectionHeuristicLocale INSTANCE =
301                new TextDirectionHeuristicLocale();
302    }
303}
304