1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.text;
19
20import java.awt.font.NumericShaper;
21import java.awt.font.TextAttribute;
22import java.util.ArrayList;
23import java.util.Arrays;
24
25/**
26 * Implements the <a href="http://unicode.org/reports/tr9/">Unicode Bidirectional Algorithm</a>.
27 *
28 * <p>Use a {@code Bidi} object to get the information on the position reordering of a
29 * bidirectional text, such as Arabic or Hebrew. The natural display ordering of
30 * horizontal text in these languages is from right to left, while they order
31 * numbers from left to right.
32 *
33 * <p>If the text contains multiple runs, the information of each run can be
34 * obtained from the run index. The level of any particular run indicates the
35 * direction of the text as well as the nesting level. Left-to-right runs have
36 * even levels while right-to-left runs have odd levels.
37 */
38public final class Bidi {
39    /**
40     * Constant that indicates the default base level. If there is no strong
41     * character, then set the paragraph level to 0 (left-to-right).
42     */
43    public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
44
45    /**
46     * Constant that indicates the default base level. If there is no strong
47     * character, then set the paragraph level to 1 (right-to-left).
48     */
49    public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
50
51    /**
52     * Constant that specifies the default base level as 0 (left-to-right).
53     */
54    public static final int DIRECTION_LEFT_TO_RIGHT = 0;
55
56    /**
57     * Constant that specifies the default base level as 1 (right-to-left).
58     */
59    public static final int DIRECTION_RIGHT_TO_LEFT = 1;
60
61    /**
62     * TODO: if we care about performance, we might just want to use an int[] instead of a Run[].
63     */
64    static class Run {
65        private final int start;
66        private final int limit;
67        private final int level;
68
69        public Run(int start, int limit, int level) {
70            this.start = start;
71            this.limit = limit;
72            this.level = level;
73        }
74
75        public int getLevel() {
76            return level;
77        }
78
79        public int getLimit() {
80            return limit;
81        }
82
83        public int getStart() {
84            return start;
85        }
86    }
87
88    /**
89     * Creates a {@code Bidi} object from the {@code
90     * AttributedCharacterIterator} of a paragraph text. The RUN_DIRECTION
91     * attribute determines the base direction of the bidirectional text. If it
92     * is not specified explicitly, the algorithm uses
93     * DIRECTION_DEFAULT_LEFT_TO_RIGHT by default. The BIDI_EMBEDDING attribute
94     * specifies the level of embedding for each character. Values between -1
95     * and -62 denote overrides at the level's absolute value, values from 1 to
96     * 62 indicate embeddings, and the 0 value indicates the level is calculated
97     * by the algorithm automatically. For the character with no BIDI_EMBEDDING
98     * attribute or with a improper attribute value, such as a {@code null}
99     * value, the algorithm treats its embedding level as 0. The NUMERIC_SHAPING
100     * attribute specifies the instance of NumericShaper used to convert
101     * European digits to other decimal digits before performing the bidi
102     * algorithm.
103     *
104     * @param paragraph
105     *            the String containing the paragraph text to perform the
106     *            algorithm.
107     * @throws IllegalArgumentException if {@code paragraph == null}
108     * @see java.awt.font.TextAttribute#BIDI_EMBEDDING
109     * @see java.awt.font.TextAttribute#NUMERIC_SHAPING
110     * @see java.awt.font.TextAttribute#RUN_DIRECTION
111     */
112    public Bidi(AttributedCharacterIterator paragraph) {
113        if (paragraph == null) {
114            throw new IllegalArgumentException("paragraph is null");
115        }
116
117        int begin = paragraph.getBeginIndex();
118        int end = paragraph.getEndIndex();
119        int length = end - begin;
120        char[] text = new char[length + 1]; // One more char for AttributedCharacterIterator.DONE
121
122        if (length != 0) {
123            text[0] = paragraph.first();
124        } else {
125            paragraph.first();
126        }
127
128        // First check the RUN_DIRECTION attribute.
129        int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
130        Object direction = paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
131        if (direction != null && direction instanceof Boolean) {
132            if (direction.equals(TextAttribute.RUN_DIRECTION_LTR)) {
133                flags = DIRECTION_LEFT_TO_RIGHT;
134            } else {
135                flags = DIRECTION_RIGHT_TO_LEFT;
136            }
137        }
138
139        // Retrieve the text and gather BIDI_EMBEDDINGS
140        byte[] embeddings = null;
141        for (int textLimit = 1, i = 1; i < length; textLimit = paragraph
142                .getRunLimit(TextAttribute.BIDI_EMBEDDING)
143                - begin + 1) {
144            Object embedding = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
145            if (embedding != null && embedding instanceof Integer) {
146                int embLevel = ((Integer) embedding).intValue();
147
148                if (embeddings == null) {
149                    embeddings = new byte[length];
150                }
151
152                for (; i < textLimit; i++) {
153                    text[i] = paragraph.next();
154                    embeddings[i - 1] = (byte) embLevel;
155                }
156            } else {
157                for (; i < textLimit; i++) {
158                    text[i] = paragraph.next();
159                }
160            }
161        }
162
163        // Apply NumericShaper to the text
164        Object numericShaper = paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
165        if (numericShaper != null && numericShaper instanceof NumericShaper) {
166            ((NumericShaper) numericShaper).shape(text, 0, length);
167        }
168
169        long bidi = 0;
170        try {
171            bidi = createUBiDi(text, 0, embeddings, 0, length, flags);
172            readBidiInfo(bidi);
173        } finally {
174            ubidi_close(bidi);
175        }
176    }
177
178    /**
179     * Creates a {@code Bidi} object.
180     *
181     * @param text
182     *            the char array of the paragraph text that is processed.
183     * @param textStart
184     *            the index in {@code text} of the start of the paragraph.
185     * @param embeddings
186     *            the embedding level array of the paragraph text, specifying
187     *            the embedding level information for each character. Values
188     *            between -1 and -61 denote overrides at the level's absolute
189     *            value, values from 1 to 61 indicate embeddings, and the 0
190     *            value indicates the level is calculated by the algorithm
191     *            automatically.
192     * @param embStart
193     *            the index in {@code embeddings} of the start of the paragraph.
194     * @param paragraphLength
195     *            the length of the text to perform the algorithm.
196     * @param flags
197     *            indicates the base direction of the bidirectional text. It is
198     *            expected that this will be one of the direction constant
199     *            values defined in this class. An unknown value is treated as
200     *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
201     * @throws IllegalArgumentException
202     *             if {@code textStart}, {@code embStart}, or {@code
203     *             paragraphLength} is negative; if
204     *             {@code text.length < textStart + paragraphLength} or
205     *             {@code embeddings.length < embStart + paragraphLength}.
206     * @see #DIRECTION_LEFT_TO_RIGHT
207     * @see #DIRECTION_RIGHT_TO_LEFT
208     * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
209     * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
210     */
211    public Bidi(char[] text, int textStart, byte[] embeddings, int embStart,
212            int paragraphLength, int flags) {
213
214        if (text == null || text.length - textStart < paragraphLength) {
215            throw new IllegalArgumentException();
216        }
217
218        if (embeddings != null) {
219            if (embeddings.length - embStart < paragraphLength) {
220                throw new IllegalArgumentException();
221            }
222        }
223
224        if (textStart < 0) {
225            throw new IllegalArgumentException("Negative textStart value " + textStart);
226        }
227        if (embStart < 0) {
228            throw new IllegalArgumentException("Negative embStart value " + embStart);
229        }
230        if (paragraphLength < 0) {
231            throw new IllegalArgumentException("Negative paragraph length " + paragraphLength);
232        }
233
234        long bidi = 0;
235        try {
236            bidi = createUBiDi(text, textStart, embeddings, embStart, paragraphLength, flags);
237            readBidiInfo(bidi);
238        } finally {
239            ubidi_close(bidi);
240        }
241    }
242
243    /**
244     * Creates a {@code Bidi} object.
245     *
246     * @param paragraph
247     *            the string containing the paragraph text to perform the
248     *            algorithm on.
249     * @param flags
250     *            indicates the base direction of the bidirectional text. It is
251     *            expected that this will be one of the direction constant
252     *            values defined in this class. An unknown value is treated as
253     *            DIRECTION_DEFAULT_LEFT_TO_RIGHT.
254     * @see #DIRECTION_LEFT_TO_RIGHT
255     * @see #DIRECTION_RIGHT_TO_LEFT
256     * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
257     * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
258     */
259    public Bidi(String paragraph, int flags) {
260        this((paragraph == null ? null : paragraph.toCharArray()), 0, null, 0,
261                (paragraph == null ? 0 : paragraph.length()), flags);
262    }
263
264    // create the native UBiDi struct, need to be closed with ubidi_close().
265    private static long createUBiDi(char[] text, int textStart,
266            byte[] embeddings, int embStart, int paragraphLength, int flags) {
267        char[] realText = null;
268
269        byte[] realEmbeddings = null;
270
271        if (text == null || text.length - textStart < paragraphLength) {
272            throw new IllegalArgumentException();
273        }
274        realText = new char[paragraphLength];
275        System.arraycopy(text, textStart, realText, 0, paragraphLength);
276
277        if (embeddings != null) {
278            if (embeddings.length - embStart < paragraphLength) {
279                throw new IllegalArgumentException();
280            }
281            if (paragraphLength > 0) {
282                Bidi temp = new Bidi(text, textStart, null, 0, paragraphLength, flags);
283                realEmbeddings = new byte[paragraphLength];
284                System.arraycopy(temp.offsetLevel, 0, realEmbeddings, 0, paragraphLength);
285                for (int i = 0; i < paragraphLength; i++) {
286                    byte e = embeddings[i];
287                    if (e < 0) {
288                        realEmbeddings[i] = (byte) (UBIDI_LEVEL_OVERRIDE - e);
289                    } else if (e > 0) {
290                        realEmbeddings[i] = e;
291                    } else {
292                        realEmbeddings[i] |= (byte) UBIDI_LEVEL_OVERRIDE;
293                    }
294                }
295            }
296        }
297
298        if (flags > 1 || flags < -2) {
299            flags = 0;
300        }
301
302        long bidi = 0;
303        boolean needsDeletion = true;
304        try {
305            bidi = ubidi_open();
306            ubidi_setPara(bidi, realText, paragraphLength, flags, realEmbeddings);
307            needsDeletion = false;
308        } finally {
309            if (needsDeletion) {
310                ubidi_close(bidi);
311            }
312        }
313        return bidi;
314    }
315
316    /* private constructor used by createLineBidi() */
317    private Bidi(long pBidi) {
318        readBidiInfo(pBidi);
319    }
320
321    // read info from the native UBiDi struct
322    private void readBidiInfo(long pBidi) {
323        length = ubidi_getLength(pBidi);
324
325        offsetLevel = (length == 0) ? null : ubidi_getLevels(pBidi);
326
327        baseLevel = ubidi_getParaLevel(pBidi);
328
329        int runCount = ubidi_countRuns(pBidi);
330        if (runCount == 0) {
331            unidirectional = true;
332            runs = null;
333        } else if (runCount < 0) {
334            runs = null;
335        } else {
336            runs = ubidi_getRuns(pBidi);
337
338            // Simplified case for one run which has the base level
339            if (runCount == 1 && runs[0].getLevel() == baseLevel) {
340                unidirectional = true;
341                runs = null;
342            }
343        }
344
345        direction = ubidi_getDirection(pBidi);
346    }
347
348    private int baseLevel;
349
350    private int length;
351
352    private byte[] offsetLevel;
353
354    private Run[] runs;
355
356    private int direction;
357
358    private boolean unidirectional;
359
360    /**
361     * Returns whether the base level is from left to right.
362     *
363     * @return true if the base level is from left to right.
364     */
365    public boolean baseIsLeftToRight() {
366        return baseLevel % 2 == 0 ? true : false;
367    }
368
369    /**
370     * Creates a new {@code Bidi} object containing the information of one line
371     * from this object.
372     *
373     * @param lineStart
374     *            the start offset of the line.
375     * @param lineLimit
376     *            the limit of the line.
377     * @return the new line Bidi object. In this new object, the indices will
378     *         range from 0 to (limit - start - 1).
379     * @throws IllegalArgumentException
380     *             if {@code lineStart < 0}, {@code lineLimit < 0}, {@code
381     *             lineStart > lineLimit} or if {@code lineStart} is greater
382     *             than the length of this object's paragraph text.
383     */
384    public Bidi createLineBidi(int lineStart, int lineLimit) {
385        if (lineStart < 0 || lineLimit < 0 || lineLimit > length || lineStart > lineLimit) {
386            throw new IllegalArgumentException("Invalid ranges (start=" + lineStart + ", " +
387                    "limit=" + lineLimit + ", length=" + length + ")");
388        }
389
390        char[] text = new char[this.length];
391        Arrays.fill(text, 'a');
392        byte[] embeddings = new byte[this.length];
393        for (int i = 0; i < embeddings.length; i++) {
394            embeddings[i] = (byte) -this.offsetLevel[i];
395        }
396
397        int dir = this.baseIsLeftToRight()
398                ? Bidi.DIRECTION_LEFT_TO_RIGHT
399                : Bidi.DIRECTION_RIGHT_TO_LEFT;
400        long parent = 0;
401        try {
402            parent = createUBiDi(text, 0, embeddings, 0, this.length, dir);
403            if (lineStart == lineLimit) {
404                return createEmptyLineBidi(parent);
405            }
406            return new Bidi(ubidi_setLine(parent, lineStart, lineLimit));
407        } finally {
408            ubidi_close(parent);
409        }
410    }
411
412    private Bidi createEmptyLineBidi(long parent) {
413        // ICU4C doesn't allow this case, but the RI does.
414        Bidi result = new Bidi(parent);
415        result.length = 0;
416        result.offsetLevel = null;
417        result.runs = null;
418        result.unidirectional = true;
419        return result;
420    }
421
422    /**
423     * Returns the base level.
424     */
425    public int getBaseLevel() {
426        return baseLevel;
427    }
428
429    /**
430     * Returns the length of the text.
431     */
432    public int getLength() {
433        return length;
434    }
435
436    /**
437     * Returns the level of the given character.
438     */
439    public int getLevelAt(int offset) {
440        try {
441            return offsetLevel[offset] & ~UBIDI_LEVEL_OVERRIDE;
442        } catch (RuntimeException e) {
443            return baseLevel;
444        }
445    }
446
447    /**
448     * Returns the number of runs in the text, at least 1.
449     */
450    public int getRunCount() {
451        return unidirectional ? 1 : runs.length;
452    }
453
454    /**
455     * Returns the level of the given run.
456     */
457    public int getRunLevel(int run) {
458        return unidirectional ? baseLevel : runs[run].getLevel();
459    }
460
461    /**
462     * Returns the limit offset of the given run.
463     */
464    public int getRunLimit(int run) {
465        return unidirectional ? length : runs[run].getLimit();
466    }
467
468    /**
469     * Returns the start offset of the given run.
470     */
471    public int getRunStart(int run) {
472        return unidirectional ? 0 : runs[run].getStart();
473    }
474
475    /**
476     * Returns true if the text is from left to right, that is, both the base
477     * direction and the text direction is from left to right.
478     */
479    public boolean isLeftToRight() {
480        return direction == UBiDiDirection_UBIDI_LTR;
481    }
482
483    /**
484     * Returns true if the text direction is mixed.
485     */
486    public boolean isMixed() {
487        return direction == UBiDiDirection_UBIDI_MIXED;
488    }
489
490    /**
491     * Returns true if the text is from right to left, that is, both the base
492     * direction and the text direction is from right to left.
493     */
494    public boolean isRightToLeft() {
495        return direction == UBiDiDirection_UBIDI_RTL;
496    }
497
498    /**
499     * Reorders a range of objects according to their specified levels. This is
500     * a convenience function that does not use a {@code Bidi} object. The range
501     * of objects at {@code index} from {@code objectStart} to {@code
502     * objectStart + count} will be reordered according to the range of levels
503     * at {@code index} from {@code levelStart} to {@code levelStart + count}.
504     *
505     * @param levels
506     *            the level array, which is already determined.
507     * @param levelStart
508     *            the start offset of the range of the levels.
509     * @param objects
510     *            the object array to reorder.
511     * @param objectStart
512     *            the start offset of the range of objects.
513     * @param count
514     *            the count of the range of objects to reorder.
515     * @throws IllegalArgumentException
516     *             if {@code count}, {@code levelStart} or {@code objectStart}
517     *             is negative; if {@code count > levels.length - levelStart} or
518     *             if {@code count > objects.length - objectStart}.
519     */
520    public static void reorderVisually(byte[] levels, int levelStart,
521            Object[] objects, int objectStart, int count) {
522        if (count < 0 || levelStart < 0 || objectStart < 0
523                || count > levels.length - levelStart
524                || count > objects.length - objectStart) {
525            throw new IllegalArgumentException("Invalid ranges (levels=" + levels.length +
526                    ", levelStart=" + levelStart + ", objects=" + objects.length +
527                    ", objectStart=" + objectStart + ", count=" + count + ")");
528        }
529
530        byte[] realLevels = new byte[count];
531        System.arraycopy(levels, levelStart, realLevels, 0, count);
532
533        int[] indices = ubidi_reorderVisual(realLevels, count);
534
535        ArrayList<Object> result = new ArrayList<Object>(count);
536        for (int i = 0; i < count; i++) {
537            result.add(objects[objectStart + indices[i]]);
538        }
539
540        System.arraycopy(result.toArray(), 0, objects, objectStart, count);
541    }
542
543    /**
544     * Indicates whether a range of characters of a text requires a {@code Bidi}
545     * object to display properly.
546     *
547     * @param text
548     *            the char array of the text.
549     * @param start
550     *            the start offset of the range of characters.
551     * @param limit
552     *            the limit offset of the range of characters.
553     * @return {@code true} if the range of characters requires a {@code Bidi}
554     *         object; {@code false} otherwise.
555     * @throws IllegalArgumentException
556     *             if {@code start} or {@code limit} is negative; {@code start >
557     *             limit} or {@code limit} is greater than the length of this
558     *             object's paragraph text.
559     */
560    public static boolean requiresBidi(char[] text, int start, int limit) {
561        if (limit < 0 || start < 0 || start > limit || limit > text.length) {
562            throw new IllegalArgumentException();
563        }
564
565        Bidi bidi = new Bidi(text, start, null, 0, limit - start, 0);
566        return !bidi.isLeftToRight();
567    }
568
569    @Override
570    public String toString() {
571        return getClass().getName()
572                + "[direction: " + direction + " baseLevel: " + baseLevel
573                + " length: " + length + " runs: " + Arrays.toString(runs) + "]";
574    }
575
576    // ICU4C constants.
577    private static final int UBIDI_LEVEL_OVERRIDE = 0x80;
578    private static final int UBiDiDirection_UBIDI_LTR = 0;
579    private static final int UBiDiDirection_UBIDI_RTL = 1;
580    private static final int UBiDiDirection_UBIDI_MIXED = 2;
581
582    // ICU4C functions.
583    private static native long ubidi_open();
584    private static native void ubidi_close(long pBiDi);
585    private static native void ubidi_setPara(long pBiDi, char[] text, int length, int paraLevel, byte[] embeddingLevels);
586    private static native long ubidi_setLine(final long pParaBiDi, int start, int limit);
587    private static native int ubidi_getDirection(final long pBiDi);
588    private static native int ubidi_getLength(final long pBiDi);
589    private static native byte ubidi_getParaLevel(final long pBiDi);
590    private static native byte[] ubidi_getLevels(long pBiDi);
591    private static native int ubidi_countRuns(long pBiDi);
592    private static native Bidi.Run[] ubidi_getRuns(long pBidi);
593    private static native int[] ubidi_reorderVisual(byte[] levels, int length);
594}
595