1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4*******************************************************************************
5*   Copyright (C) 2001-2016, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*/
9
10/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
11 * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
12 * concept of RUNS_ONLY which is a double operation.
13 * It could be advantageous to divide this into 3 concepts:
14 * a) Operation: direct / inverse / RUNS_ONLY
15 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
16 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
17 * This would allow combinations not possible today like RUNS_ONLY with
18 * NUMBERS_SPECIAL.
19 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
20 * REMOVE_CONTROLS for the inverse step.
21 * Not all combinations would be supported, and probably not all do make sense.
22 * This would need to document which ones are supported and what are the
23 * fallbacks for unsupported combinations.
24 */
25
26//TODO: make sample program do something simple but real and complete
27
28package com.ibm.icu.text;
29
30import java.awt.font.NumericShaper;
31import java.awt.font.TextAttribute;
32import java.lang.reflect.Array;
33import java.text.AttributedCharacterIterator;
34import java.util.Arrays;
35
36import com.ibm.icu.impl.UBiDiProps;
37import com.ibm.icu.lang.UCharacter;
38import com.ibm.icu.lang.UCharacterDirection;
39import com.ibm.icu.lang.UProperty;
40
41/**
42 *
43 * <h2>Bidi algorithm for ICU</h2>
44 *
45 * This is an implementation of the Unicode Bidirectional Algorithm. The
46 * algorithm is defined in the <a
47 * href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.
48 * <p>
49 *
50 * Note: Libraries that perform a bidirectional algorithm and reorder strings
51 * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
52 * shaping (ArabicShaping) classes can be used at the core of such "Storage
53 * Layout Engines".
54 *
55 * <h3>General remarks about the API:</h3>
56 *
57 * The &quot;limit&quot; of a sequence of characters is the position just after
58 * their last character, i.e., one more than that position.
59 * <p>
60 *
61 * Some of the API methods provide access to &quot;runs&quot;. Such a
62 * &quot;run&quot; is defined as a sequence of characters that are at the same
63 * embedding level after performing the Bidi algorithm.
64 *
65 * <h3>Basic concept: paragraph</h3>
66 * A piece of text can be divided into several paragraphs by characters
67 * with the Bidi class <code>Block Separator</code>. For handling of
68 * paragraphs, see:
69 * <ul>
70 * <li>{@link #countParagraphs}
71 * <li>{@link #getParaLevel}
72 * <li>{@link #getParagraph}
73 * <li>{@link #getParagraphByIndex}
74 * </ul>
75 *
76 * <h3>Basic concept: text direction</h3>
77 * The direction of a piece of text may be:
78 * <ul>
79 * <li>{@link #LTR}
80 * <li>{@link #RTL}
81 * <li>{@link #MIXED}
82 * <li>{@link #NEUTRAL}
83 * </ul>
84 *
85 * <h3>Basic concept: levels</h3>
86 *
87 * Levels in this API represent embedding levels according to the Unicode
88 * Bidirectional Algorithm.
89 * Their low-order bit (even/odd value) indicates the visual direction.<p>
90 *
91 * Levels can be abstract values when used for the
92 * <code>paraLevel</code> and <code>embeddingLevels</code>
93 * arguments of <code>setPara()</code>; there:
94 * <ul>
95 * <li>the high-order bit of an <code>embeddingLevels[]</code>
96 * value indicates whether the using application is
97 * specifying the level of a character to <i>override</i> whatever the
98 * Bidi implementation would resolve it to.</li>
99 * <li><code>paraLevel</code> can be set to the
100 * pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
101 * and <code>LEVEL_DEFAULT_RTL</code>.</li>
102 * </ul>
103 *
104 * <p>The related constants are not real, valid level values.
105 * <code>DEFAULT_XXX</code> can be used to specify
106 * a default for the paragraph level for
107 * when the <code>setPara()</code> method
108 * shall determine it but there is no
109 * strongly typed character in the input.<p>
110 *
111 * Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
112 * and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
113 * just like with normal LTR and RTL level values -
114 * these special values are designed that way. Also, the implementation
115 * assumes that MAX_EXPLICIT_LEVEL is odd.
116 *
117 * <b>See Also:</b>
118 * <ul>
119 * <li>{@link #LEVEL_DEFAULT_LTR}
120 * <li>{@link #LEVEL_DEFAULT_RTL}
121 * <li>{@link #LEVEL_OVERRIDE}
122 * <li>{@link #MAX_EXPLICIT_LEVEL}
123 * <li>{@link #setPara}
124 * </ul>
125 *
126 * <h3>Basic concept: Reordering Mode</h3>
127 * Reordering mode values indicate which variant of the Bidi algorithm to
128 * use.
129 *
130 * <b>See Also:</b>
131 * <ul>
132 * <li>{@link #setReorderingMode}
133 * <li>{@link #REORDER_DEFAULT}
134 * <li>{@link #REORDER_NUMBERS_SPECIAL}
135 * <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
136 * <li>{@link #REORDER_RUNS_ONLY}
137 * <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
138 * <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
139 * <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
140 * </ul>
141 *
142 * <h3>Basic concept: Reordering Options</h3>
143 * Reordering options can be applied during Bidi text transformations.
144 *
145 * <b>See Also:</b>
146 * <ul>
147 * <li>{@link #setReorderingOptions}
148 * <li>{@link #OPTION_DEFAULT}
149 * <li>{@link #OPTION_INSERT_MARKS}
150 * <li>{@link #OPTION_REMOVE_CONTROLS}
151 * <li>{@link #OPTION_STREAMING}
152 * </ul>
153 *
154 * <h4> Sample code for the ICU Bidi API </h4>
155 *
156 * <h5>Rendering a paragraph with the ICU Bidi API</h5>
157 *
158 * This is (hypothetical) sample code that illustrates how the ICU Bidi API
159 * could be used to render a paragraph of text. Rendering code depends highly on
160 * the graphics system, therefore this sample code must make a lot of
161 * assumptions, which may or may not match any existing graphics system's
162 * properties.
163 *
164 * <p>
165 * The basic assumptions are:
166 *
167 * <ul>
168 * <li>Rendering is done from left to right on a horizontal line.</li>
169 * <li>A run of single-style, unidirectional text can be rendered at once.
170 * </li>
171 * <li>Such a run of text is passed to the graphics system with characters
172 * (code units) in logical order.</li>
173 * <li>The line-breaking algorithm is very complicated and Locale-dependent -
174 * and therefore its implementation omitted from this sample code.</li>
175 * </ul>
176 *
177 * <pre>
178 *
179 *  package com.ibm.icu.dev.test.bidi;
180 *
181 *  import com.ibm.icu.text.Bidi;
182 *  import com.ibm.icu.text.BidiRun;
183 *
184 *  public class Sample {
185 *
186 *      static final int styleNormal = 0;
187 *      static final int styleSelected = 1;
188 *      static final int styleBold = 2;
189 *      static final int styleItalics = 4;
190 *      static final int styleSuper=8;
191 *      static final int styleSub = 16;
192 *
193 *      static class StyleRun {
194 *          int limit;
195 *          int style;
196 *
197 *          public StyleRun(int limit, int style) {
198 *              this.limit = limit;
199 *              this.style = style;
200 *          }
201 *      }
202 *
203 *      static class Bounds {
204 *          int start;
205 *          int limit;
206 *
207 *          public Bounds(int start, int limit) {
208 *              this.start = start;
209 *              this.limit = limit;
210 *          }
211 *      }
212 *
213 *      static int getTextWidth(String text, int start, int limit,
214 *                              StyleRun[] styleRuns, int styleRunCount) {
215 *          // simplistic way to compute the width
216 *          return limit - start;
217 *      }
218 *
219 *      // set limit and StyleRun limit for a line
220 *      // from text[start] and from styleRuns[styleRunStart]
221 *      // using Bidi.getLogicalRun(...)
222 *      // returns line width
223 *      static int getLineBreak(String text, Bounds line, Bidi para,
224 *                              StyleRun styleRuns[], Bounds styleRun) {
225 *          // dummy return
226 *          return 0;
227 *      }
228 *
229 *      // render runs on a line sequentially, always from left to right
230 *
231 *      // prepare rendering a new line
232 *      static void startLine(byte textDirection, int lineWidth) {
233 *          System.out.println();
234 *      }
235 *
236 *      // render a run of text and advance to the right by the run width
237 *      // the text[start..limit-1] is always in logical order
238 *      static void renderRun(String text, int start, int limit,
239 *                            byte textDirection, int style) {
240 *      }
241 *
242 *      // We could compute a cross-product
243 *      // from the style runs with the directional runs
244 *      // and then reorder it.
245 *      // Instead, here we iterate over each run type
246 *      // and render the intersections -
247 *      // with shortcuts in simple (and common) cases.
248 *      // renderParagraph() is the main function.
249 *
250 *      // render a directional run with
251 *      // (possibly) multiple style runs intersecting with it
252 *      static void renderDirectionalRun(String text, int start, int limit,
253 *                                       byte direction, StyleRun styleRuns[],
254 *                                       int styleRunCount) {
255 *          int i;
256 *
257 *          // iterate over style runs
258 *          if (direction == Bidi.LTR) {
259 *              int styleLimit;
260 *              for (i = 0; i &lt; styleRunCount; ++i) {
261 *                  styleLimit = styleRuns[i].limit;
262 *                  if (start &lt; styleLimit) {
263 *                      if (styleLimit &gt; limit) {
264 *                          styleLimit = limit;
265 *                      }
266 *                      renderRun(text, start, styleLimit,
267 *                                direction, styleRuns[i].style);
268 *                      if (styleLimit == limit) {
269 *                          break;
270 *                      }
271 *                      start = styleLimit;
272 *                  }
273 *              }
274 *          } else {
275 *              int styleStart;
276 *
277 *              for (i = styleRunCount-1; i &gt;= 0; --i) {
278 *                  if (i &gt; 0) {
279 *                      styleStart = styleRuns[i-1].limit;
280 *                  } else {
281 *                      styleStart = 0;
282 *                  }
283 *                  if (limit &gt;= styleStart) {
284 *                      if (styleStart &lt; start) {
285 *                          styleStart = start;
286 *                      }
287 *                      renderRun(text, styleStart, limit, direction,
288 *                                styleRuns[i].style);
289 *                      if (styleStart == start) {
290 *                          break;
291 *                      }
292 *                      limit = styleStart;
293 *                  }
294 *              }
295 *          }
296 *      }
297 *
298 *      // the line object represents text[start..limit-1]
299 *      static void renderLine(Bidi line, String text, int start, int limit,
300 *                             StyleRun styleRuns[], int styleRunCount) {
301 *          byte direction = line.getDirection();
302 *          if (direction != Bidi.MIXED) {
303 *              // unidirectional
304 *              if (styleRunCount &lt;= 1) {
305 *                  renderRun(text, start, limit, direction, styleRuns[0].style);
306 *              } else {
307 *                  renderDirectionalRun(text, start, limit, direction,
308 *                                       styleRuns, styleRunCount);
309 *              }
310 *          } else {
311 *              // mixed-directional
312 *              int count, i;
313 *              BidiRun run;
314 *
315 *              try {
316 *                  count = line.countRuns();
317 *              } catch (IllegalStateException e) {
318 *                  e.printStackTrace();
319 *                  return;
320 *              }
321 *              if (styleRunCount &lt;= 1) {
322 *                  int style = styleRuns[0].style;
323 *
324 *                  // iterate over directional runs
325 *                  for (i = 0; i &lt; count; ++i) {
326 *                      run = line.getVisualRun(i);
327 *                      renderRun(text, run.getStart(), run.getLimit(),
328 *                                run.getDirection(), style);
329 *                  }
330 *              } else {
331 *                  // iterate over both directional and style runs
332 *                  for (i = 0; i &lt; count; ++i) {
333 *                      run = line.getVisualRun(i);
334 *                      renderDirectionalRun(text, run.getStart(),
335 *                                           run.getLimit(), run.getDirection(),
336 *                                           styleRuns, styleRunCount);
337 *                  }
338 *              }
339 *          }
340 *      }
341 *
342 *      static void renderParagraph(String text, byte textDirection,
343 *                                  StyleRun styleRuns[], int styleRunCount,
344 *                                  int lineWidth) {
345 *          int length = text.length();
346 *          Bidi para = new Bidi();
347 *          try {
348 *              para.setPara(text,
349 *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
350 *                                              : Bidi.LEVEL_DEFAULT_LTR,
351 *                           null);
352 *          } catch (Exception e) {
353 *              e.printStackTrace();
354 *              return;
355 *          }
356 *          byte paraLevel = (byte)(1 &amp; para.getParaLevel());
357 *          StyleRun styleRun = new StyleRun(length, styleNormal);
358 *
359 *          if (styleRuns == null || styleRunCount &lt;= 0) {
360 *              styleRuns = new StyleRun[1];
361 *              styleRunCount = 1;
362 *              styleRuns[0] = styleRun;
363 *          }
364 *          // assume styleRuns[styleRunCount-1].limit&gt;=length
365 *
366 *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
367 *          if (width &lt;= lineWidth) {
368 *              // everything fits onto one line
369 *
370 *              // prepare rendering a new line from either left or right
371 *              startLine(paraLevel, width);
372 *
373 *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
374 *          } else {
375 *              // we need to render several lines
376 *              Bidi line = new Bidi(length, 0);
377 *              int start = 0, limit;
378 *              int styleRunStart = 0, styleRunLimit;
379 *
380 *              for (;;) {
381 *                  limit = length;
382 *                  styleRunLimit = styleRunCount;
383 *                  width = getLineBreak(text, new Bounds(start, limit),
384 *                                       para, styleRuns,
385 *                                       new Bounds(styleRunStart, styleRunLimit));
386 *                  try {
387 *                      line = para.setLine(start, limit);
388 *                  } catch (Exception e) {
389 *                      e.printStackTrace();
390 *                      return;
391 *                  }
392 *                  // prepare rendering a new line
393 *                  // from either left or right
394 *                  startLine(paraLevel, width);
395 *
396 *                  if (styleRunStart &gt; 0) {
397 *                      int newRunCount = styleRuns.length - styleRunStart;
398 *                      StyleRun[] newRuns = new StyleRun[newRunCount];
399 *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
400 *                                       newRunCount);
401 *                      renderLine(line, text, start, limit, newRuns,
402 *                                 styleRunLimit - styleRunStart);
403 *                  } else {
404 *                      renderLine(line, text, start, limit, styleRuns,
405 *                                 styleRunLimit - styleRunStart);
406 *                  }
407 *                  if (limit == length) {
408 *                      break;
409 *                  }
410 *                  start = limit;
411 *                  styleRunStart = styleRunLimit - 1;
412 *                  if (start &gt;= styleRuns[styleRunStart].limit) {
413 *                      ++styleRunStart;
414 *                  }
415 *              }
416 *          }
417 *      }
418 *
419 *      public static void main(String[] args)
420 *      {
421 *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
422 *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
423 *      }
424 *  }
425 *
426 * </pre>
427 *
428 * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
429 * @stable ICU 3.8
430 */
431
432/*
433 * General implementation notes:
434 *
435 * Throughout the implementation, there are comments like (W2) that refer to
436 * rules of the BiDi algorithm, in this example to the second rule of the
437 * resolution of weak types.
438 *
439 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
440 * character according to UTF-16, the second UChar gets the directional property of
441 * the entire character assigned, while the first one gets a BN, a boundary
442 * neutral, type, which is ignored by most of the algorithm according to
443 * rule (X9) and the implementation suggestions of the BiDi algorithm.
444 *
445 * Later, adjustWSLevels() will set the level for each BN to that of the
446 * following character (UChar), which results in surrogate pairs getting the
447 * same level on each of their surrogates.
448 *
449 * In a UTF-8 implementation, the same thing could be done: the last byte of
450 * a multi-byte sequence would get the "real" property, while all previous
451 * bytes of that sequence would get BN.
452 *
453 * It is not possible to assign all those parts of a character the same real
454 * property because this would fail in the resolution of weak types with rules
455 * that look at immediately surrounding types.
456 *
457 * As a related topic, this implementation does not remove Boundary Neutral
458 * types from the input, but ignores them wherever this is relevant.
459 * For example, the loop for the resolution of the weak types reads
460 * types until it finds a non-BN.
461 * Also, explicit embedding codes are neither changed into BN nor removed.
462 * They are only treated the same way real BNs are.
463 * As stated before, adjustWSLevels() takes care of them at the end.
464 * For the purpose of conformance, the levels of all these codes
465 * do not matter.
466 *
467 * Note that this implementation modifies the dirProps
468 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
469 * X6, N0 (replace paired brackets by L or R).
470 *
471 * In this implementation, the resolution of weak types (W1 to W6),
472 * neutrals (N1 and N2), and the assignment of the resolved level (In)
473 * are all done in one single loop, in resolveImplicitLevels().
474 * Changes of dirProp values are done on the fly, without writing
475 * them back to the dirProps array.
476 *
477 *
478 * This implementation contains code that allows to bypass steps of the
479 * algorithm that are not needed on the specific paragraph
480 * in order to speed up the most common cases considerably,
481 * like text that is entirely LTR, or RTL text without numbers.
482 *
483 * Most of this is done by setting a bit for each directional property
484 * in a flags variable and later checking for whether there are
485 * any LTR characters or any RTL characters, or both, whether
486 * there are any explicit embedding codes, etc.
487 *
488 * If the (Xn) steps are performed, then the flags are re-evaluated,
489 * because they will then not contain the embedding codes any more
490 * and will be adjusted for override codes, so that subsequently
491 * more bypassing may be possible than what the initial flags suggested.
492 *
493 * If the text is not mixed-directional, then the
494 * algorithm steps for the weak type resolution are not performed,
495 * and all levels are set to the paragraph level.
496 *
497 * If there are no explicit embedding codes, then the (Xn) steps
498 * are not performed.
499 *
500 * If embedding levels are supplied as a parameter, then all
501 * explicit embedding codes are ignored, and the (Xn) steps
502 * are not performed.
503 *
504 * White Space types could get the level of the run they belong to,
505 * and are checked with a test of (flags&MASK_EMBEDDING) to
506 * consider if the paragraph direction should be considered in
507 * the flags variable.
508 *
509 * If there are no White Space types in the paragraph, then
510 * (L1) is not necessary in adjustWSLevels().
511 */
512
513public class Bidi {
514
515    static class Point {
516        int pos;    /* position in text */
517        int flag;   /* flag for LRM/RLM, before/after */
518    }
519
520    static class InsertPoints {
521        int size;
522        int confirmed;
523        Point[] points = new Point[0];
524    }
525
526    static class Opening {
527        int   position;                 /* position of opening bracket */
528        int   match;                    /* matching char or -position of closing bracket */
529        int   contextPos;               /* position of last strong char found before opening */
530        short flags;                    /* bits for L or R/AL found within the pair */
531        byte  contextDir;               /* L or R according to last strong char before opening */
532    }
533
534    static class IsoRun {
535        int   contextPos;               /* position of char determining context */
536        short start;                    /* index of first opening entry for this run */
537        short limit;                    /* index after last opening entry for this run */
538        byte  level;                    /* level of this run */
539        byte  lastStrong;               /* bidi class of last strong char found in this run */
540        byte  lastBase;                 /* bidi class of last base char found in this run */
541        byte  contextDir;               /* L or R to use as context for following openings */
542    }
543
544    static class BracketData {
545        Opening[] openings = new Opening[SIMPLE_OPENINGS_COUNT];
546        int   isoRunLast;               /* index of last used entry */
547        /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
548           + 1 for index 0, + 1 for before the first isolated sequence */
549        IsoRun[]  isoRuns = new IsoRun[MAX_EXPLICIT_LEVEL+2];
550        boolean   isNumbersSpecial;     /*reordering mode for NUMBERS_SPECIAL */
551    }
552
553    static class Isolate {
554        int   startON;
555        int   start1;
556        short stateImp;
557        short state;
558    }
559
560    /** Paragraph level setting<p>
561     *
562     * Constant indicating that the base direction depends on the first strong
563     * directional character in the text according to the Unicode Bidirectional
564     * Algorithm. If no strong directional character is present,
565     * then set the paragraph level to 0 (left-to-right).<p>
566     *
567     * If this value is used in conjunction with reordering modes
568     * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
569     * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
570     * is assumed to be visual LTR, and the text after reordering is required
571     * to be the corresponding logical string with appropriate contextual
572     * direction. The direction of the result string will be RTL if either
573     * the rightmost or leftmost strong character of the source text is RTL
574     * or Arabic Letter, the direction will be LTR otherwise.<p>
575     *
576     * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
577     * be added at the beginning of the result string to ensure round trip
578     * (that the result string, when reordered back to visual, will produce
579     * the original source text).
580     * @see #REORDER_INVERSE_LIKE_DIRECT
581     * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
582     * @stable ICU 3.8
583     */
584    public static final byte LEVEL_DEFAULT_LTR = (byte)0x7e;
585
586    /** Paragraph level setting<p>
587     *
588     * Constant indicating that the base direction depends on the first strong
589     * directional character in the text according to the Unicode Bidirectional
590     * Algorithm. If no strong directional character is present,
591     * then set the paragraph level to 1 (right-to-left).<p>
592     *
593     * If this value is used in conjunction with reordering modes
594     * <code>REORDER_INVERSE_LIKE_DIRECT</code> or
595     * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
596     * is assumed to be visual LTR, and the text after reordering is required
597     * to be the corresponding logical string with appropriate contextual
598     * direction. The direction of the result string will be RTL if either
599     * the rightmost or leftmost strong character of the source text is RTL
600     * or Arabic Letter, or if the text contains no strong character;
601     * the direction will be LTR otherwise.<p>
602     *
603     * If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
604     * be added at the beginning of the result string to ensure round trip
605     * (that the result string, when reordered back to visual, will produce
606     * the original source text).
607     * @see #REORDER_INVERSE_LIKE_DIRECT
608     * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
609     * @stable ICU 3.8
610     */
611    public static final byte LEVEL_DEFAULT_RTL = (byte)0x7f;
612
613    /**
614     * Maximum explicit embedding level.
615     * (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
616     * @stable ICU 3.8
617     */
618    public static final byte MAX_EXPLICIT_LEVEL = 125;
619
620    /**
621     * Bit flag for level input.
622     * Overrides directional properties.
623     * @stable ICU 3.8
624     */
625    public static final byte LEVEL_OVERRIDE = (byte)0x80;
626
627    /**
628     * Special value which can be returned by the mapping methods when a
629     * logical index has no corresponding visual index or vice-versa. This may
630     * happen for the logical-to-visual mapping of a Bidi control when option
631     * <code>OPTION_REMOVE_CONTROLS</code> is
632     * specified. This can also happen for the visual-to-logical mapping of a
633     * Bidi mark (LRM or RLM) inserted by option
634     * <code>OPTION_INSERT_MARKS</code>.
635     * @see #getVisualIndex
636     * @see #getVisualMap
637     * @see #getLogicalIndex
638     * @see #getLogicalMap
639     * @see #OPTION_INSERT_MARKS
640     * @see #OPTION_REMOVE_CONTROLS
641     * @stable ICU 3.8
642     */
643    public static final int MAP_NOWHERE = -1;
644
645    /**
646     * Left-to-right text.
647     * <ul>
648     * <li>As return value for <code>getDirection()</code>, it means
649     *     that the source string contains no right-to-left characters, or
650     *     that the source string is empty and the paragraph level is even.
651     * <li>As return value for <code>getBaseDirection()</code>, it
652     *     means that the first strong character of the source string has
653     *     a left-to-right direction.
654     * </ul>
655     * @stable ICU 3.8
656     */
657    public static final byte LTR = 0;
658
659    /**
660     * Right-to-left text.
661     * <ul>
662     * <li>As return value for <code>getDirection()</code>, it means
663     *     that the source string contains no left-to-right characters, or
664     *     that the source string is empty and the paragraph level is odd.
665     * <li>As return value for <code>getBaseDirection()</code>, it
666     *     means that the first strong character of the source string has
667     *     a right-to-left direction.
668     * </ul>
669     * @stable ICU 3.8
670     */
671    public static final byte RTL = 1;
672
673    /**
674     * Mixed-directional text.
675     * <p>As return value for <code>getDirection()</code>, it means
676     *    that the source string contains both left-to-right and
677     *    right-to-left characters.
678     * @stable ICU 3.8
679     */
680    public static final byte MIXED = 2;
681
682    /**
683     * No strongly directional text.
684     * <p>As return value for <code>getBaseDirection()</code>, it means
685     *    that the source string is missing or empty, or contains neither
686     *    left-to-right nor right-to-left characters.
687     * @stable ICU 4.6
688     */
689    public static final byte NEUTRAL = 3;
690
691    /**
692     * option bit for writeReordered():
693     * keep combining characters after their base characters in RTL runs
694     *
695     * @see #writeReordered
696     * @stable ICU 3.8
697     */
698    public static final short KEEP_BASE_COMBINING = 1;
699
700    /**
701     * option bit for writeReordered():
702     * replace characters with the "mirrored" property in RTL runs
703     * by their mirror-image mappings
704     *
705     * @see #writeReordered
706     * @stable ICU 3.8
707     */
708    public static final short DO_MIRRORING = 2;
709
710    /**
711     * option bit for writeReordered():
712     * surround the run with LRMs if necessary;
713     * this is part of the approximate "inverse Bidi" algorithm
714     *
715     * <p>This option does not imply corresponding adjustment of the index
716     * mappings.
717     *
718     * @see #setInverse
719     * @see #writeReordered
720     * @stable ICU 3.8
721     */
722    public static final short INSERT_LRM_FOR_NUMERIC = 4;
723
724    /**
725     * option bit for writeReordered():
726     * remove Bidi control characters
727     * (this does not affect INSERT_LRM_FOR_NUMERIC)
728     *
729     * <p>This option does not imply corresponding adjustment of the index
730     * mappings.
731     *
732     * @see #writeReordered
733     * @see #INSERT_LRM_FOR_NUMERIC
734     * @stable ICU 3.8
735     */
736    public static final short REMOVE_BIDI_CONTROLS = 8;
737
738    /**
739     * option bit for writeReordered():
740     * write the output in reverse order
741     *
742     * <p>This has the same effect as calling <code>writeReordered()</code>
743     * first without this option, and then calling
744     * <code>writeReverse()</code> without mirroring.
745     * Doing this in the same step is faster and avoids a temporary buffer.
746     * An example for using this option is output to a character terminal that
747     * is designed for RTL scripts and stores text in reverse order.
748     *
749     * @see #writeReordered
750     * @stable ICU 3.8
751     */
752    public static final short OUTPUT_REVERSE = 16;
753
754    /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
755     * @see #setReorderingMode
756     * @stable ICU 3.8
757     */
758    public static final short REORDER_DEFAULT = 0;
759
760    /** Reordering mode: Logical to Visual algorithm which handles numbers in
761     * a way which mimicks the behavior of Windows XP.
762     * @see #setReorderingMode
763     * @stable ICU 3.8
764     */
765    public static final short REORDER_NUMBERS_SPECIAL = 1;
766
767    /** Reordering mode: Logical to Visual algorithm grouping numbers with
768     * adjacent R characters (reversible algorithm).
769     * @see #setReorderingMode
770     * @stable ICU 3.8
771     */
772    public static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
773
774    /** Reordering mode: Reorder runs only to transform a Logical LTR string
775     * to the logical RTL string with the same display, or vice-versa.<br>
776     * If this mode is set together with option
777     * <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
778     * text may be removed and other controls may be added to produce the
779     * minimum combination which has the required display.
780     * @see #OPTION_INSERT_MARKS
781     * @see #setReorderingMode
782     * @stable ICU 3.8
783     */
784    public static final short REORDER_RUNS_ONLY = 3;
785
786    /** Reordering mode: Visual to Logical algorithm which handles numbers
787     * like L (same algorithm as selected by <code>setInverse(true)</code>.
788     * @see #setInverse
789     * @see #setReorderingMode
790     * @stable ICU 3.8
791     */
792    public static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
793
794    /** Reordering mode: Visual to Logical algorithm equivalent to the regular
795     * Logical to Visual algorithm.
796     * @see #setReorderingMode
797     * @stable ICU 3.8
798     */
799    public static final short REORDER_INVERSE_LIKE_DIRECT = 5;
800
801    /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
802     * <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
803     * @see #setReorderingMode
804     * @stable ICU 3.8
805     */
806    public static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
807
808    /*  Number of values for reordering mode. */
809    static final short REORDER_COUNT = 7;
810
811    /* Reordering mode values must be ordered so that all the regular logical to
812     * visual modes come first, and all inverse Bidi modes come last.
813     */
814    static final short REORDER_LAST_LOGICAL_TO_VISUAL =
815            REORDER_NUMBERS_SPECIAL;
816
817    /**
818     * Option value for <code>setReorderingOptions</code>:
819     * disable all the options which can be set with this method
820     * @see #setReorderingOptions
821     * @stable ICU 3.8
822     */
823    public static final int OPTION_DEFAULT = 0;
824
825    /**
826     * Option bit for <code>setReorderingOptions</code>:
827     * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
828     * a reordering to a Logical order
829     *
830     * <p>This option must be set or reset before calling
831     * <code>setPara</code>.
832     *
833     * <p>This option is significant only with reordering modes which generate
834     * a result with Logical order, specifically.
835     * <ul>
836     *   <li><code>REORDER_RUNS_ONLY</code></li>
837     *   <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
838     *   <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
839     *   <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
840     * </ul>
841     *
842     * <p>If this option is set in conjunction with reordering mode
843     * <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
844     * <code>setInverse(true)</code>, it implies option
845     * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
846     * <code>writeReordered()</code>.
847     *
848     * <p>For other reordering modes, a minimum number of LRM or RLM characters
849     * will be added to the source text after reordering it so as to ensure
850     * round trip, i.e. when applying the inverse reordering mode on the
851     * resulting logical text with removal of Bidi marks
852     * (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
853     * <code>setPara()</code> or option
854     * <code>REMOVE_BIDI_CONTROLS</code> in
855     * <code>writeReordered</code>), the result will be identical to the
856     * source text in the first transformation.
857     *
858     * <p>This option will be ignored if specified together with option
859     * <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
860     * <code>REMOVE_BIDI_CONTROLS</code> in calls to method
861     * <code>writeReordered()</code> and it implies option
862     * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
863     * <code>writeReordered()</code> if the reordering mode is
864     * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.
865     *
866     * @see #setReorderingMode
867     * @see #setReorderingOptions
868     * @see #INSERT_LRM_FOR_NUMERIC
869     * @see #REMOVE_BIDI_CONTROLS
870     * @see #OPTION_REMOVE_CONTROLS
871     * @see #REORDER_RUNS_ONLY
872     * @see #REORDER_INVERSE_NUMBERS_AS_L
873     * @see #REORDER_INVERSE_LIKE_DIRECT
874     * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
875     * @stable ICU 3.8
876     */
877    public static final int OPTION_INSERT_MARKS = 1;
878
879    /**
880     * Option bit for <code>setReorderingOptions</code>:
881     * remove Bidi control characters
882     *
883     * <p>This option must be set or reset before calling
884     * <code>setPara</code>.
885     *
886     * <p>This option nullifies option
887     * <code>OPTION_INSERT_MARKS</code>. It inhibits option
888     * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
889     * <code>writeReordered()</code> and it implies option
890     * <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.
891     *
892     * @see #setReorderingMode
893     * @see #setReorderingOptions
894     * @see #OPTION_INSERT_MARKS
895     * @see #INSERT_LRM_FOR_NUMERIC
896     * @see #REMOVE_BIDI_CONTROLS
897     * @stable ICU 3.8
898     */
899    public static final int OPTION_REMOVE_CONTROLS = 2;
900
901    /**
902     * Option bit for <code>setReorderingOptions</code>:
903     * process the output as part of a stream to be continued
904     *
905     * <p>This option must be set or reset before calling
906     * <code>setPara</code>.
907     *
908     * <p>This option specifies that the caller is interested in processing
909     * large text object in parts. The results of the successive calls are
910     * expected to be concatenated by the caller. Only the call for the last
911     * part will have this option bit off.
912     *
913     * <p>When this option bit is on, <code>setPara()</code> may process
914     * less than the full source text in order to truncate the text at a
915     * meaningful boundary. The caller should call
916     * <code>getProcessedLength()</code> immediately after calling
917     * <code>setPara()</code> in order to determine how much of the source
918     * text has been processed. Source text beyond that length should be
919     * resubmitted in following calls to <code>setPara</code>. The
920     * processed length may be less than the length of the source text if a
921     * character preceding the last character of the source text constitutes a
922     * reasonable boundary (like a block separator) for text to be continued.<br>
923     * If the last character of the source text constitutes a reasonable
924     * boundary, the whole text will be processed at once.<br>
925     * If nowhere in the source text there exists
926     * such a reasonable boundary, the processed length will be zero.<br>
927     * The caller should check for such an occurrence and do one of the following:
928     * <ul><li>submit a larger amount of text with a better chance to include
929     *         a reasonable boundary.</li>
930     *     <li>resubmit the same text after turning off option
931     *         <code>OPTION_STREAMING</code>.</li></ul>
932     * In all cases, this option should be turned off before processing the last
933     * part of the text.
934     *
935     * <p>When the <code>OPTION_STREAMING</code> option is used, it is
936     * recommended to call <code>orderParagraphsLTR(true)</code> before calling
937     * <code>setPara()</code> so that later paragraphs may be concatenated to
938     * previous paragraphs on the right.
939     *
940     * @see #setReorderingMode
941     * @see #setReorderingOptions
942     * @see #getProcessedLength
943     * @stable ICU 3.8
944     */
945    public static final int OPTION_STREAMING = 4;
946
947    /*
948     *   Comparing the description of the Bidi algorithm with this implementation
949     *   is easier with the same names for the Bidi types in the code as there.
950     *   See UCharacterDirection
951     */
952    static final byte L   = UCharacterDirection.LEFT_TO_RIGHT;                  /*  0 */
953    static final byte R   = UCharacterDirection.RIGHT_TO_LEFT;                  /*  1 */
954    static final byte EN  = UCharacterDirection.EUROPEAN_NUMBER;                /*  2 */
955    static final byte ES  = UCharacterDirection.EUROPEAN_NUMBER_SEPARATOR;      /*  3 */
956    static final byte ET  = UCharacterDirection.EUROPEAN_NUMBER_TERMINATOR;     /*  4 */
957    static final byte AN  = UCharacterDirection.ARABIC_NUMBER;                  /*  5 */
958    static final byte CS  = UCharacterDirection.COMMON_NUMBER_SEPARATOR;        /*  6 */
959    static final byte B   = UCharacterDirection.BLOCK_SEPARATOR;                /*  7 */
960    static final byte S   = UCharacterDirection.SEGMENT_SEPARATOR;              /*  8 */
961    static final byte WS  = UCharacterDirection.WHITE_SPACE_NEUTRAL;            /*  9 */
962    static final byte ON  = UCharacterDirection.OTHER_NEUTRAL;                  /* 10 */
963    static final byte LRE = UCharacterDirection.LEFT_TO_RIGHT_EMBEDDING;        /* 11 */
964    static final byte LRO = UCharacterDirection.LEFT_TO_RIGHT_OVERRIDE;         /* 12 */
965    static final byte AL  = UCharacterDirection.RIGHT_TO_LEFT_ARABIC;           /* 13 */
966    static final byte RLE = UCharacterDirection.RIGHT_TO_LEFT_EMBEDDING;        /* 14 */
967    static final byte RLO = UCharacterDirection.RIGHT_TO_LEFT_OVERRIDE;         /* 15 */
968    static final byte PDF = UCharacterDirection.POP_DIRECTIONAL_FORMAT;         /* 16 */
969    static final byte NSM = UCharacterDirection.DIR_NON_SPACING_MARK;           /* 17 */
970    static final byte BN  = UCharacterDirection.BOUNDARY_NEUTRAL;               /* 18 */
971    static final byte FSI = UCharacterDirection.FIRST_STRONG_ISOLATE;           /* 19 */
972    static final byte LRI = UCharacterDirection.LEFT_TO_RIGHT_ISOLATE;          /* 20 */
973    static final byte RLI = UCharacterDirection.RIGHT_TO_LEFT_ISOLATE;          /* 21 */
974    static final byte PDI = UCharacterDirection.POP_DIRECTIONAL_ISOLATE;        /* 22 */
975    static final byte ENL = PDI + 1;    /* EN after W7 */                       /* 23 */
976    static final byte ENR = ENL + 1;    /* EN not subject to W7 */              /* 24 */
977
978    /**
979     * Value returned by <code>BidiClassifier</code> when there is no need to
980     * override the standard Bidi class for a given code point.
981     *
982     * <p>This constant is deprecated; use UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1 instead.
983     *
984     * @see BidiClassifier
985     * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
986     */
987    @Deprecated
988    public static final int CLASS_DEFAULT = UCharacterDirection.CHAR_DIRECTION_COUNT;
989
990    /* number of paras entries allocated initially */
991    static final int SIMPLE_PARAS_COUNT = 10;
992    /* number of isolate run entries for paired brackets allocated initially */
993    static final int SIMPLE_OPENINGS_COUNT = 20;
994
995    private static final char CR = '\r';
996    private static final char LF = '\n';
997
998    static final int LRM_BEFORE = 1;
999    static final int LRM_AFTER = 2;
1000    static final int RLM_BEFORE = 4;
1001    static final int RLM_AFTER = 8;
1002
1003    /* flags for Opening.flags */
1004    static final byte FOUND_L = (byte)DirPropFlag(L);
1005    static final byte FOUND_R = (byte)DirPropFlag(R);
1006
1007    /*
1008     * The following bit is used for the directional isolate status.
1009     * Stack entries corresponding to isolate sequences are greater than ISOLATE.
1010     */
1011    static final int ISOLATE = 0x0100;
1012
1013
1014    /*
1015     * reference to parent paragraph object (reference to self if this object is
1016     * a paragraph object); set to null in a newly opened object; set to a
1017     * real value after a successful execution of setPara or setLine
1018     */
1019    Bidi                paraBidi;
1020
1021    final UBiDiProps    bdp;
1022
1023    /* character array representing the current text */
1024    char[]              text;
1025
1026    /* length of the current text */
1027    int                 originalLength;
1028
1029    /* if the option OPTION_STREAMING is set, this is the length of
1030     * text actually processed by <code>setPara</code>, which may be shorter
1031     * than the original length. Otherwise, it is identical to the original
1032     * length.
1033     */
1034    int                 length;
1035
1036    /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
1037     * marks are allowed to be inserted in one of the reordering modes, the
1038     * length of the result string may be different from the processed length.
1039     */
1040    int                 resultLength;
1041
1042    /* indicators for whether memory may be allocated after construction */
1043    boolean             mayAllocateText;
1044    boolean             mayAllocateRuns;
1045
1046    /* arrays with one value per text-character */
1047    byte[]              dirPropsMemory = new byte[1];
1048    byte[]              levelsMemory = new byte[1];
1049    byte[]              dirProps;
1050    byte[]              levels;
1051
1052    /* are we performing an approximation of the "inverse Bidi" algorithm? */
1053    boolean             isInverse;
1054
1055    /* are we using the basic algorithm or its variation? */
1056    int                 reorderingMode;
1057
1058    /* bitmask for reordering options */
1059    int                 reorderingOptions;
1060
1061    /* must block separators receive level 0? */
1062    boolean             orderParagraphsLTR;
1063
1064    /* the paragraph level */
1065    byte                paraLevel;
1066    /* original paraLevel when contextual */
1067    /* must be one of DEFAULT_xxx or 0 if not contextual */
1068    byte                defaultParaLevel;
1069
1070    /* context data */
1071    String              prologue;
1072    String              epilogue;
1073
1074    /* the following is set in setPara, used in processPropertySeq */
1075
1076    ImpTabPair          impTabPair;  /* reference to levels state table pair */
1077    /* the overall paragraph or line directionality*/
1078    byte                direction;
1079
1080    /* flags is a bit set for which directional properties are in the text */
1081    int                 flags;
1082
1083    /* lastArabicPos is index to the last AL in the text, -1 if none */
1084    int                 lastArabicPos;
1085
1086    /* characters after trailingWSStart are WS and are */
1087    /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
1088    int                 trailingWSStart;
1089
1090    /* fields for paragraph handling, set in getDirProps() */
1091    int                 paraCount;
1092    int[]               paras_limit = new int[SIMPLE_PARAS_COUNT];
1093    byte[]              paras_level = new byte[SIMPLE_PARAS_COUNT];
1094
1095    /* fields for line reordering */
1096    int                 runCount;     /* ==-1: runs not set up yet */
1097    BidiRun[]           runsMemory = new BidiRun[0];
1098    BidiRun[]           runs;
1099
1100    /* for non-mixed text, we only need a tiny array of runs (no allocation) */
1101    BidiRun[]           simpleRuns = {new BidiRun()};
1102
1103    /* fields for managing isolate sequences */
1104    Isolate[]           isolates;
1105    /* maximum or current nesting depth of isolate sequences */
1106    /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
1107       nesting encountered.
1108       Within resolveImplicitLevels(), this is the index of the current isolates
1109       stack entry. */
1110    int                 isolateCount;
1111
1112    /* mapping of runs in logical order to visual order */
1113    int[]               logicalToVisualRunsMap;
1114    /* flag to indicate that the map has been updated */
1115    boolean             isGoodLogicalToVisualRunsMap;
1116
1117    /* customized class provider */
1118    BidiClassifier      customClassifier = null;
1119
1120    /* for inverse Bidi with insertion of directional marks */
1121    InsertPoints        insertPoints = new InsertPoints();
1122
1123    /* for option OPTION_REMOVE_CONTROLS */
1124    int                 controlCount;
1125
1126    /*
1127     * Sometimes, bit values are more appropriate
1128     * to deal with directionality properties.
1129     * Abbreviations in these method names refer to names
1130     * used in the Bidi algorithm.
1131     */
1132    static int DirPropFlag(byte dir) {
1133        return (1 << dir);
1134    }
1135
1136    boolean testDirPropFlagAt(int flag, int index) {
1137        return ((DirPropFlag(dirProps[index]) & flag) != 0);
1138    }
1139
1140    static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
1141
1142    /* to avoid some conditional statements, use tiny constant arrays */
1143    static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
1144    static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
1145    static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
1146
1147    static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
1148    static final int DirPropFlagE(byte level)  { return DirPropFlagE[level & 1]; }
1149    static final int DirPropFlagO(byte level)  { return DirPropFlagO[level & 1]; }
1150    static final byte DirFromStrong(byte strong) { return strong == L ? L : R; }
1151    static final byte NoOverride(byte level) { return (byte)(level & ~LEVEL_OVERRIDE); }
1152
1153    /*  are there any characters that are LTR or RTL? */
1154    static final int MASK_LTR =
1155        DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(ENL)|DirPropFlag(ENR)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(LRI);
1156    static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(RLI);
1157
1158    static final int MASK_R_AL = DirPropFlag(R)|DirPropFlag(AL);
1159    static final int MASK_STRONG_EN_AN = DirPropFlag(L)|DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(EN)|DirPropFlag(AN);
1160    /* explicit embedding codes */
1161    static final int MASK_EXPLICIT = DirPropFlag(LRE)|DirPropFlag(LRO)|DirPropFlag(RLE)|DirPropFlag(RLO)|DirPropFlag(PDF);
1162    static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
1163
1164    /* explicit isolate codes */
1165    static final int MASK_ISO = DirPropFlag(LRI)|DirPropFlag(RLI)|DirPropFlag(FSI)|DirPropFlag(PDI);
1166
1167    /* paragraph and segment separators */
1168    static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
1169
1170    /* all types that are counted as White Space or Neutral in some steps */
1171    static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT|MASK_ISO;
1172
1173    /* types that are neutrals or could becomes neutrals in (Wn) */
1174    static final int MASK_POSSIBLE_N = DirPropFlag(ON)|DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_WS;
1175
1176    /*
1177     * These types may be changed to "e",
1178     * the embedding type (L or R) of the run,
1179     * in the Bidi algorithm (N2)
1180     */
1181    static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
1182
1183    /*
1184     *  the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
1185     */
1186    static byte GetLRFromLevel(byte level)
1187    {
1188        return (byte)(level & 1);
1189    }
1190
1191    static boolean IsDefaultLevel(byte level)
1192    {
1193        return ((level & LEVEL_DEFAULT_LTR) == LEVEL_DEFAULT_LTR);
1194    }
1195
1196    static boolean IsBidiControlChar(int c)
1197    {
1198        /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
1199                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
1200        return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))
1201                                             || ((c >= 0x2066) && (c <= 0x2069)));
1202    }
1203
1204    void verifyValidPara()
1205    {
1206        if (!(this == this.paraBidi)) {
1207            throw new IllegalStateException();
1208        }
1209    }
1210
1211    void verifyValidParaOrLine()
1212    {
1213        Bidi para = this.paraBidi;
1214        /* verify Para */
1215        if (this == para) {
1216            return;
1217        }
1218        /* verify Line */
1219        if ((para == null) || (para != para.paraBidi)) {
1220            throw new IllegalStateException();
1221        }
1222    }
1223
1224    void verifyRange(int index, int start, int limit)
1225    {
1226        if (index < start || index >= limit) {
1227            throw new IllegalArgumentException("Value " + index +
1228                      " is out of range " + start + " to " + limit);
1229        }
1230    }
1231
1232    /**
1233     * Allocate a <code>Bidi</code> object.
1234     * Such an object is initially empty. It is assigned
1235     * the Bidi properties of a piece of text containing one or more paragraphs
1236     * by <code>setPara()</code>
1237     * or the Bidi properties of a line within a paragraph by
1238     * <code>setLine()</code>.<p>
1239     * This object can be reused.<p>
1240     * <code>setPara()</code> and <code>setLine()</code> will allocate
1241     * additional memory for internal structures as necessary.
1242     *
1243     * @stable ICU 3.8
1244     */
1245    public Bidi()
1246    {
1247        this(0, 0);
1248    }
1249
1250    /**
1251     * Allocate a <code>Bidi</code> object with preallocated memory
1252     * for internal structures.
1253     * This method provides a <code>Bidi</code> object like the default constructor
1254     * but it also preallocates memory for internal structures
1255     * according to the sizings supplied by the caller.<p>
1256     * The preallocation can be limited to some of the internal memory
1257     * by setting some values to 0 here. That means that if, e.g.,
1258     * <code>maxRunCount</code> cannot be reasonably predetermined and should not
1259     * be set to <code>maxLength</code> (the only failproof value) to avoid
1260     * wasting  memory, then <code>maxRunCount</code> could be set to 0 here
1261     * and the internal structures that are associated with it will be allocated
1262     * on demand, just like with the default constructor.
1263     *
1264     * @param maxLength is the maximum text or line length that internal memory
1265     *        will be preallocated for. An attempt to associate this object with a
1266     *        longer text will fail, unless this value is 0, which leaves the allocation
1267     *        up to the implementation.
1268     *
1269     * @param maxRunCount is the maximum anticipated number of same-level runs
1270     *        that internal memory will be preallocated for. An attempt to access
1271     *        visual runs on an object that was not preallocated for as many runs
1272     *        as the text was actually resolved to will fail,
1273     *        unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1274     *        The number of runs depends on the actual text and maybe anywhere between
1275     *        1 and <code>maxLength</code>. It is typically small.
1276     *
1277     * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1278     * @stable ICU 3.8
1279     */
1280    public Bidi(int maxLength, int maxRunCount)
1281    {
1282        /* check the argument values */
1283        if (maxLength < 0 || maxRunCount < 0) {
1284            throw new IllegalArgumentException();
1285        }
1286
1287        /* reset the object, all reference variables null, all flags false,
1288           all sizes 0.
1289           In fact, we don't need to do anything, since class members are
1290           initialized as zero when an instance is created.
1291         */
1292        /*
1293        mayAllocateText = false;
1294        mayAllocateRuns = false;
1295        orderParagraphsLTR = false;
1296        paraCount = 0;
1297        runCount = 0;
1298        trailingWSStart = 0;
1299        flags = 0;
1300        paraLevel = 0;
1301        defaultParaLevel = 0;
1302        direction = 0;
1303        */
1304        /* get Bidi properties */
1305        bdp = UBiDiProps.INSTANCE;
1306
1307        /* allocate memory for arrays as requested */
1308        if (maxLength > 0) {
1309            getInitialDirPropsMemory(maxLength);
1310            getInitialLevelsMemory(maxLength);
1311        } else {
1312            mayAllocateText = true;
1313        }
1314
1315        if (maxRunCount > 0) {
1316            // if maxRunCount == 1, use simpleRuns[]
1317            if (maxRunCount > 1) {
1318                getInitialRunsMemory(maxRunCount);
1319            }
1320        } else {
1321            mayAllocateRuns = true;
1322        }
1323    }
1324
1325    /*
1326     * We are allowed to allocate memory if object==null or
1327     * mayAllocate==true for each array that we need.
1328     *
1329     * Assume sizeNeeded>0.
1330     * If object != null, then assume size > 0.
1331     */
1332    private Object getMemory(String label, Object array, Class<?> arrayClass,
1333            boolean mayAllocate, int sizeNeeded)
1334    {
1335        int len = Array.getLength(array);
1336
1337        /* we have at least enough memory and must not allocate */
1338        if (sizeNeeded == len) {
1339            return array;
1340        }
1341        if (!mayAllocate) {
1342            /* we must not allocate */
1343            if (sizeNeeded <= len) {
1344                return array;
1345            }
1346            throw new OutOfMemoryError("Failed to allocate memory for "
1347                                       + label);
1348        }
1349        /* we may try to grow or shrink */
1350        /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1351           the allocation altogether and rely on this.length */
1352        try {
1353            return Array.newInstance(arrayClass, sizeNeeded);
1354        } catch (Exception e) {
1355            throw new OutOfMemoryError("Failed to allocate memory for "
1356                                       + label);
1357        }
1358    }
1359
1360    /* helper methods for each allocated array */
1361    private void getDirPropsMemory(boolean mayAllocate, int len)
1362    {
1363        Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1364        dirPropsMemory = (byte[]) array;
1365    }
1366
1367    void getDirPropsMemory(int len)
1368    {
1369        getDirPropsMemory(mayAllocateText, len);
1370    }
1371
1372    private void getLevelsMemory(boolean mayAllocate, int len)
1373    {
1374        Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1375        levelsMemory = (byte[]) array;
1376    }
1377
1378    void getLevelsMemory(int len)
1379    {
1380        getLevelsMemory(mayAllocateText, len);
1381    }
1382
1383    private void getRunsMemory(boolean mayAllocate, int len)
1384    {
1385        Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1386        runsMemory = (BidiRun[]) array;
1387    }
1388
1389    void getRunsMemory(int len)
1390    {
1391        getRunsMemory(mayAllocateRuns, len);
1392    }
1393
1394    /* additional methods used by constructor - always allow allocation */
1395    private void getInitialDirPropsMemory(int len)
1396    {
1397        getDirPropsMemory(true, len);
1398    }
1399
1400    private void getInitialLevelsMemory(int len)
1401    {
1402        getLevelsMemory(true, len);
1403    }
1404
1405    private void getInitialRunsMemory(int len)
1406    {
1407        getRunsMemory(true, len);
1408    }
1409
1410    /**
1411     * Modify the operation of the Bidi algorithm such that it
1412     * approximates an "inverse Bidi" algorithm. This method
1413     * must be called before <code>setPara()</code>.
1414     *
1415     * <p>The normal operation of the Bidi algorithm as described
1416     * in the Unicode Technical Report is to take text stored in logical
1417     * (keyboard, typing) order and to determine the reordering of it for visual
1418     * rendering.
1419     * Some legacy systems store text in visual order, and for operations
1420     * with standard, Unicode-based algorithms, the text needs to be transformed
1421     * to logical order. This is effectively the inverse algorithm of the
1422     * described Bidi algorithm. Note that there is no standard algorithm for
1423     * this "inverse Bidi" and that the current implementation provides only an
1424     * approximation of "inverse Bidi".
1425     *
1426     * <p>With <code>isInversed</code> set to <code>true</code>,
1427     * this method changes the behavior of some of the subsequent methods
1428     * in a way that they can be used for the inverse Bidi algorithm.
1429     * Specifically, runs of text with numeric characters will be treated in a
1430     * special way and may need to be surrounded with LRM characters when they are
1431     * written in reordered sequence.
1432     *
1433     * <p>Output runs should be retrieved using <code>getVisualRun()</code>.
1434     * Since the actual input for "inverse Bidi" is visually ordered text and
1435     * <code>getVisualRun()</code> gets the reordered runs, these are actually
1436     * the runs of the logically ordered output.
1437     *
1438     * <p>Calling this method with argument <code>isInverse</code> set to
1439     * <code>true</code> is equivalent to calling <code>setReorderingMode</code>
1440     * with argument <code>reorderingMode</code>
1441     * set to <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
1442     * Calling this method with argument <code>isInverse</code> set to
1443     * <code>false</code> is equivalent to calling <code>setReorderingMode</code>
1444     * with argument <code>reorderingMode</code>
1445     * set to <code>REORDER_DEFAULT</code>.
1446     *
1447     * @param isInverse specifies "forward" or "inverse" Bidi operation.
1448     *
1449     * @see #setPara
1450     * @see #writeReordered
1451     * @see #setReorderingMode
1452     * @see #REORDER_INVERSE_NUMBERS_AS_L
1453     * @see #REORDER_DEFAULT
1454     * @stable ICU 3.8
1455     */
1456    public void setInverse(boolean isInverse) {
1457        this.isInverse = (isInverse);
1458        this.reorderingMode = isInverse ? REORDER_INVERSE_NUMBERS_AS_L
1459                : REORDER_DEFAULT;
1460    }
1461
1462    /**
1463     * Is this <code>Bidi</code> object set to perform the inverse Bidi
1464     * algorithm?
1465     * <p>Note: calling this method after setting the reordering mode with
1466     * <code>setReorderingMode</code> will return <code>true</code> if the
1467     * reordering mode was set to
1468     * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, <code>false</code>
1469     * for all other values.
1470     *
1471     * @return <code>true</code> if the <code>Bidi</code> object is set to
1472     * perform the inverse Bidi algorithm by handling numbers as L.
1473     *
1474     * @see #setInverse
1475     * @see #setReorderingMode
1476     * @see #REORDER_INVERSE_NUMBERS_AS_L
1477     * @stable ICU 3.8
1478     */
1479    public boolean isInverse() {
1480        return isInverse;
1481    }
1482
1483    /**
1484     * Modify the operation of the Bidi algorithm such that it implements some
1485     * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
1486     * algorithm, depending on different values of the "reordering mode".
1487     * This method must be called before <code>setPara()</code>, and stays in
1488     * effect until called again with a different argument.
1489     *
1490     * <p>The normal operation of the Bidi algorithm as described in the Unicode
1491     * Standard Annex #9 is to take text stored in logical (keyboard, typing)
1492     * order and to determine how to reorder it for visual rendering.
1493     *
1494     * <p>With the reordering mode set to a value other than
1495     * <code>REORDER_DEFAULT</code>, this method changes the behavior of some of
1496     * the subsequent methods in a way such that they implement an inverse Bidi
1497     * algorithm or some other algorithm variants.
1498     *
1499     * <p>Some legacy systems store text in visual order, and for operations
1500     * with standard, Unicode-based algorithms, the text needs to be transformed
1501     * into logical order. This is effectively the inverse algorithm of the
1502     * described Bidi algorithm. Note that there is no standard algorithm for
1503     * this "inverse Bidi", so a number of variants are implemented here.
1504     *
1505     * <p>In other cases, it may be desirable to emulate some variant of the
1506     * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
1507     * Logical to Logical transformation.
1508     *
1509     * <ul>
1510     * <li>When the Reordering Mode is set to
1511     * <code>REORDER_DEFAULT</code>,
1512     * the standard Bidi Logical to Visual algorithm is applied.</li>
1513     *
1514     * <li>When the reordering mode is set to
1515     * <code>REORDER_NUMBERS_SPECIAL</code>,
1516     * the algorithm used to perform Bidi transformations when calling
1517     * <code>setPara</code> should approximate the algorithm used in Microsoft
1518     * Windows XP rather than strictly conform to the Unicode Bidi algorithm.
1519     * <br>
1520     * The differences between the basic algorithm and the algorithm addressed
1521     * by this option are as follows:
1522     * <ul>
1523     *   <li>Within text at an even embedding level, the sequence "123AB"
1524     *   (where AB represent R or AL letters) is transformed to "123BA" by the
1525     *   Unicode algorithm and to "BA123" by the Windows algorithm.</li>
1526     *
1527     *   <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
1528     *   like regular numbers (EN).</li>
1529     * </ul></li>
1530     *
1531     * <li>When the reordering mode is set to
1532     * <code>REORDER_GROUP_NUMBERS_WITH_R</code>,
1533     * numbers located between LTR text and RTL text are associated with the RTL
1534     * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
1535     * upper case letters represent RTL characters) will be transformed to
1536     * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
1537     * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
1538     * This makes the algorithm reversible and makes it useful when round trip
1539     * (from visual to logical and back to visual) must be achieved without
1540     * adding LRM characters. However, this is a variation from the standard
1541     * Unicode Bidi algorithm.<br>
1542     * The source text should not contain Bidi control characters other than LRM
1543     * or RLM.</li>
1544     *
1545     * <li>When the reordering mode is set to
1546     * <code>REORDER_RUNS_ONLY</code>,
1547     * a "Logical to Logical" transformation must be performed:
1548     * <ul>
1549     * <li>If the default text level of the source text (argument
1550     * <code>paraLevel</code> in <code>setPara</code>) is even, the source text
1551     * will be handled as LTR logical text and will be transformed to the RTL
1552     * logical text which has the same LTR visual display.</li>
1553     * <li>If the default level of the source text is odd, the source text
1554     * will be handled as RTL logical text and will be transformed to the
1555     * LTR logical text which has the same LTR visual display.</li>
1556     * </ul>
1557     * This mode may be needed when logical text which is basically Arabic or
1558     * Hebrew, with possible included numbers or phrases in English, has to be
1559     * displayed as if it had an even embedding level (this can happen if the
1560     * displaying application treats all text as if it was basically LTR).
1561     * <br>
1562     * This mode may also be needed in the reverse case, when logical text which
1563     * is basically English, with possible included phrases in Arabic or Hebrew,
1564     * has to be displayed as if it had an odd embedding level.
1565     * <br>
1566     * Both cases could be handled by adding LRE or RLE at the head of the
1567     * text, if the display subsystem supports these formatting controls. If it
1568     * does not, the problem may be handled by transforming the source text in
1569     * this mode before displaying it, so that it will be displayed properly.
1570     * <br>
1571     * The source text should not contain Bidi control characters other than LRM
1572     * or RLM.</li>
1573     *
1574     * <li>When the reordering mode is set to
1575     * <code>REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi"
1576     * algorithm is applied.
1577     * Runs of text with numeric characters will be treated like LTR letters and
1578     * may need to be surrounded with LRM characters when they are written in
1579     * reordered sequence (the option <code>INSERT_LRM_FOR_NUMERIC</code> can
1580     * be used with method <code>writeReordered</code> to this end. This mode
1581     * is equivalent to calling <code>setInverse()</code> with
1582     * argument <code>isInverse</code> set to <code>true</code>.</li>
1583     *
1584     * <li>When the reordering mode is set to
1585     * <code>REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to
1586     * Visual Bidi algorithm is used as an approximation of an "inverse Bidi"
1587     * algorithm. This mode is similar to mode
1588     * <code>REORDER_INVERSE_NUMBERS_AS_L</code> but is closer to the
1589     * regular Bidi algorithm.
1590     * <br>
1591     * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
1592     * upper case represents RTL characters) will be transformed to
1593     * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
1594     * with mode <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
1595     * When used in conjunction with option
1596     * <code>OPTION_INSERT_MARKS</code>, this mode generally
1597     * adds Bidi marks to the output significantly more sparingly than mode
1598     * <code>REORDER_INVERSE_NUMBERS_AS_L</code>.<br> with option
1599     * <code>INSERT_LRM_FOR_NUMERIC</code> in calls to
1600     * <code>writeReordered</code>.</li>
1601     *
1602     * <li>When the reordering mode is set to
1603     * <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
1604     * Bidi algorithm used in Windows XP is used as an approximation of an "inverse
1605     * Bidi" algorithm.
1606     * <br>
1607     * For example, an LTR paragraph with the content "abc FED123" (where
1608     * upper case represents RTL characters) will be transformed to
1609     * "abc 123DEF.</li>
1610     * </ul>
1611     *
1612     * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
1613     * (i.e. those with a name starting with <code>REORDER_INVERSE</code>),
1614     * output runs should be retrieved using <code>getVisualRun()</code>, and
1615     * the output text with <code>writeReordered()</code>. The caller should
1616     * keep in mind that in "inverse Bidi" modes the input is actually visually
1617     * ordered text and reordered output returned by <code>getVisualRun()</code>
1618     * or <code>writeReordered()</code> are actually runs or character string
1619     * of logically ordered output.<br>
1620     * For all the "inverse Bidi" modes, the source text should not contain
1621     * Bidi control characters other than LRM or RLM.
1622     *
1623     * <p>Note that option <code>OUTPUT_REVERSE</code> of
1624     * <code>writeReordered</code> has no useful meaning and should not be used
1625     * in conjunction with any value of the reordering mode specifying "inverse
1626     * Bidi" or with value <code>REORDER_RUNS_ONLY</code>.
1627     *
1628     * @param reorderingMode specifies the required variant of the Bidi
1629     *                       algorithm.
1630     *
1631     * @see #setInverse
1632     * @see #setPara
1633     * @see #writeReordered
1634     * @see #INSERT_LRM_FOR_NUMERIC
1635     * @see #OUTPUT_REVERSE
1636     * @see #REORDER_DEFAULT
1637     * @see #REORDER_NUMBERS_SPECIAL
1638     * @see #REORDER_GROUP_NUMBERS_WITH_R
1639     * @see #REORDER_RUNS_ONLY
1640     * @see #REORDER_INVERSE_NUMBERS_AS_L
1641     * @see #REORDER_INVERSE_LIKE_DIRECT
1642     * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
1643     * @stable ICU 3.8
1644     */
1645    public void setReorderingMode(int reorderingMode) {
1646        if ((reorderingMode < REORDER_DEFAULT) ||
1647            (reorderingMode >= REORDER_COUNT))
1648            return;                     /* don't accept a wrong value */
1649        this.reorderingMode = reorderingMode;
1650        this.isInverse =
1651            reorderingMode == REORDER_INVERSE_NUMBERS_AS_L;
1652    }
1653
1654    /**
1655     * What is the requested reordering mode for a given Bidi object?
1656     *
1657     * @return the current reordering mode of the Bidi object
1658     *
1659     * @see #setReorderingMode
1660     * @stable ICU 3.8
1661     */
1662    public int getReorderingMode() {
1663        return this.reorderingMode;
1664    }
1665
1666    /**
1667     * Specify which of the reordering options should be applied during Bidi
1668     * transformations.
1669     *
1670     * @param options A combination of zero or more of the following
1671     * reordering options:
1672     * <code>OPTION_DEFAULT</code>, <code>OPTION_INSERT_MARKS</code>,
1673     * <code>OPTION_REMOVE_CONTROLS</code>, <code>OPTION_STREAMING</code>.
1674     *
1675     * @see #getReorderingOptions
1676     * @see #OPTION_DEFAULT
1677     * @see #OPTION_INSERT_MARKS
1678     * @see #OPTION_REMOVE_CONTROLS
1679     * @see #OPTION_STREAMING
1680     * @stable ICU 3.8
1681     */
1682    public void setReorderingOptions(int options) {
1683        if ((options & OPTION_REMOVE_CONTROLS) != 0) {
1684            this.reorderingOptions = options & ~OPTION_INSERT_MARKS;
1685        } else {
1686            this.reorderingOptions = options;
1687        }
1688    }
1689
1690    /**
1691     * What are the reordering options applied to a given Bidi object?
1692     *
1693     * @return the current reordering options of the Bidi object
1694     *
1695     * @see #setReorderingOptions
1696     * @stable ICU 3.8
1697     */
1698    public int getReorderingOptions() {
1699        return this.reorderingOptions;
1700    }
1701
1702    /**
1703     * Get the base direction of the text provided according to the Unicode
1704     * Bidirectional Algorithm. The base direction is derived from the first
1705     * character in the string with bidirectional character type L, R, or AL.
1706     * If the first such character has type L, LTR is returned. If the first
1707     * such character has type R or AL, RTL is returned. If the string does
1708     * not contain any character of these types, then NEUTRAL is returned.
1709     * This is a lightweight function for use when only the base direction is
1710     * needed and no further bidi processing of the text is needed.
1711     * @param paragraph the text whose paragraph level direction is needed.
1712     * @return LTR, RTL, NEUTRAL
1713     * @see #LTR
1714     * @see #RTL
1715     * @see #NEUTRAL
1716     * @stable ICU 4.6
1717     */
1718    public static byte getBaseDirection(CharSequence paragraph) {
1719        if (paragraph == null || paragraph.length() == 0) {
1720            return NEUTRAL;
1721        }
1722
1723        int length = paragraph.length();
1724        int c;// codepoint
1725        byte direction;
1726
1727        for (int i = 0; i < length; ) {
1728            // U16_NEXT(paragraph, i, length, c) for C++
1729            c = UCharacter.codePointAt(paragraph, i);
1730            direction = UCharacter.getDirectionality(c);
1731            if (direction == UCharacterDirection.LEFT_TO_RIGHT) {
1732                return LTR;
1733            } else if (direction == UCharacterDirection.RIGHT_TO_LEFT
1734                || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) {
1735                return RTL;
1736            }
1737
1738            i = UCharacter.offsetByCodePoints(paragraph, i, 1);// set i to the head index of next codepoint
1739        }
1740        return NEUTRAL;
1741    }
1742
1743/* perform (P2)..(P3) ------------------------------------------------------- */
1744
1745    /**
1746     * Returns the directionality of the first strong character
1747     * after the last B in prologue, if any.
1748     * Requires prologue!=null.
1749     */
1750    private byte firstL_R_AL() {
1751        byte result = ON;
1752        for (int i = 0; i < prologue.length(); ) {
1753            int uchar = prologue.codePointAt(i);
1754            i += Character.charCount(uchar);
1755            byte dirProp = (byte)getCustomizedClass(uchar);
1756            if (result == ON) {
1757                if (dirProp == L || dirProp == R || dirProp == AL) {
1758                    result = dirProp;
1759                }
1760            } else {
1761                if (dirProp == B) {
1762                    result = ON;
1763                }
1764            }
1765        }
1766        return result;
1767    }
1768
1769    /*
1770     * Check that there are enough entries in the arrays paras_limit and paras_level
1771     */
1772    private void checkParaCount() {
1773        int[] saveLimits;
1774        byte[] saveLevels;
1775        int count = paraCount;
1776        if (count <= paras_level.length)
1777            return;
1778        int oldLength = paras_level.length;
1779        saveLimits = paras_limit;
1780        saveLevels = paras_level;
1781        try {
1782            paras_limit = new int[count * 2];
1783            paras_level = new byte[count * 2];
1784        } catch (Exception e) {
1785            throw new OutOfMemoryError("Failed to allocate memory for paras");
1786        }
1787        System.arraycopy(saveLimits, 0, paras_limit, 0, oldLength);
1788        System.arraycopy(saveLevels, 0, paras_level, 0, oldLength);
1789    }
1790
1791    /*
1792     * Get the directional properties for the text, calculate the flags bit-set, and
1793     * determine the paragraph level if necessary (in paras_level[i]).
1794     * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
1795     * When encountering an FSI, it is initially replaced with an LRI, which is the
1796     * default. Only if a strong R or AL is found within its scope will the LRI be
1797     * replaced by an RLI.
1798     */
1799    static final int NOT_SEEKING_STRONG = 0;        /* 0: not contextual paraLevel, not after FSI */
1800    static final int SEEKING_STRONG_FOR_PARA = 1;   /* 1: looking for first strong char in para */
1801    static final int SEEKING_STRONG_FOR_FSI = 2;    /* 2: looking for first strong after FSI */
1802    static final int LOOKING_FOR_PDI = 3;           /* 3: found strong after FSI, looking for PDI */
1803
1804    private void getDirProps()
1805    {
1806        int i = 0, i0, i1;
1807        flags = 0;          /* collect all directionalities in the text */
1808        int uchar;
1809        byte dirProp;
1810        byte defaultParaLevel = 0;   /* initialize to avoid compiler warnings */
1811        boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1812        /* for inverse Bidi, the default para level is set to RTL if there is a
1813           strong R or AL character at either end of the text                */
1814        boolean isDefaultLevelInverse=isDefaultLevel &&
1815                (reorderingMode == REORDER_INVERSE_LIKE_DIRECT ||
1816                 reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
1817        lastArabicPos = -1;
1818        int controlCount = 0;
1819        boolean removeBidiControls = (reorderingOptions & OPTION_REMOVE_CONTROLS) != 0;
1820
1821        byte state;
1822        byte lastStrong = ON;           /* for default level & inverse Bidi */
1823    /* The following stacks are used to manage isolate sequences. Those
1824       sequences may be nested, but obviously never more deeply than the
1825       maximum explicit embedding level.
1826       lastStack is the index of the last used entry in the stack. A value of -1
1827       means that there is no open isolate sequence.
1828       lastStack is reset to -1 on paragraph boundaries. */
1829    /* The following stack contains the position of the initiator of
1830       each open isolate sequence */
1831        int[] isolateStartStack= new int[MAX_EXPLICIT_LEVEL+1];
1832    /* The following stack contains the last known state before
1833       encountering the initiator of an isolate sequence */
1834        byte[] previousStateStack = new byte[MAX_EXPLICIT_LEVEL+1];
1835        int  stackLast=-1;
1836
1837        if ((reorderingOptions & OPTION_STREAMING) != 0)
1838            length = 0;
1839        defaultParaLevel = (byte)(paraLevel & 1);
1840
1841        if (isDefaultLevel) {
1842            paras_level[0] = defaultParaLevel;
1843            lastStrong = defaultParaLevel;
1844            if (prologue != null &&                        /* there is a prologue */
1845                (dirProp = firstL_R_AL()) != ON) {     /* with a strong character */
1846                if (dirProp == L)
1847                    paras_level[0] = 0;             /* set the default para level */
1848                else
1849                    paras_level[0] = 1;             /* set the default para level */
1850                state = NOT_SEEKING_STRONG;
1851            } else {
1852                state = SEEKING_STRONG_FOR_PARA;
1853            }
1854        } else {
1855            paras_level[0] = paraLevel;
1856            state = NOT_SEEKING_STRONG;
1857        }
1858        /* count paragraphs and determine the paragraph level (P2..P3) */
1859        /*
1860         * see comment on constant fields:
1861         * the LEVEL_DEFAULT_XXX values are designed so that
1862         * their low-order bit alone yields the intended default
1863         */
1864
1865        for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1866            i0 = i;                     /* index of first code unit */
1867            uchar = UTF16.charAt(text, 0, originalLength, i);
1868            i += UTF16.getCharCount(uchar);
1869            i1 = i - 1; /* index of last code unit, gets the directional property */
1870
1871            dirProp = (byte)getCustomizedClass(uchar);
1872            flags |= DirPropFlag(dirProp);
1873            dirProps[i1] = dirProp;
1874            if (i1 > i0) {     /* set previous code units' properties to BN */
1875                flags |= DirPropFlag(BN);
1876                do {
1877                    dirProps[--i1] = BN;
1878                } while (i1 > i0);
1879            }
1880            if (removeBidiControls && IsBidiControlChar(uchar)) {
1881                controlCount++;
1882            }
1883            if (dirProp == L) {
1884                if (state == SEEKING_STRONG_FOR_PARA) {
1885                    paras_level[paraCount - 1] = 0;
1886                    state = NOT_SEEKING_STRONG;
1887                }
1888                else if (state == SEEKING_STRONG_FOR_FSI) {
1889                    if (stackLast <= MAX_EXPLICIT_LEVEL) {
1890                        /* no need for next statement, already set by default */
1891                        /* dirProps[isolateStartStack[stackLast]] = LRI; */
1892                        flags |= DirPropFlag(LRI);
1893                    }
1894                    state = LOOKING_FOR_PDI;
1895                }
1896                lastStrong = L;
1897                continue;
1898            }
1899            if (dirProp == R || dirProp == AL) {
1900                if (state == SEEKING_STRONG_FOR_PARA) {
1901                    paras_level[paraCount - 1] = 1;
1902                    state = NOT_SEEKING_STRONG;
1903                }
1904                else if (state == SEEKING_STRONG_FOR_FSI) {
1905                    if (stackLast <= MAX_EXPLICIT_LEVEL) {
1906                        dirProps[isolateStartStack[stackLast]] = RLI;
1907                        flags |= DirPropFlag(RLI);
1908                    }
1909                    state = LOOKING_FOR_PDI;
1910                }
1911                lastStrong = R;
1912                if (dirProp == AL)
1913                    lastArabicPos = i - 1;
1914                continue;
1915            }
1916            if (dirProp >= FSI && dirProp <= RLI) { /* FSI, LRI or RLI */
1917                stackLast++;
1918                if (stackLast <= MAX_EXPLICIT_LEVEL) {
1919                    isolateStartStack[stackLast] = i - 1;
1920                    previousStateStack[stackLast] = state;
1921                }
1922                if (dirProp == FSI) {
1923                    dirProps[i-1] = LRI;    /* default if no strong char */
1924                    state = SEEKING_STRONG_FOR_FSI;
1925                }
1926                else
1927                    state = LOOKING_FOR_PDI;
1928                continue;
1929            }
1930            if (dirProp == PDI) {
1931                if (state == SEEKING_STRONG_FOR_FSI) {
1932                    if (stackLast <= MAX_EXPLICIT_LEVEL) {
1933                        /* no need for next statement, already set by default */
1934                        /* dirProps[isolateStartStack[stackLast]] = LRI; */
1935                        flags |= DirPropFlag(LRI);
1936                    }
1937                }
1938                if (stackLast >= 0) {
1939                    if (stackLast <= MAX_EXPLICIT_LEVEL)
1940                        state = previousStateStack[stackLast];
1941                    stackLast--;
1942                }
1943                continue;
1944            }
1945            if (dirProp == B) {
1946                if (i < originalLength && uchar == CR && text[i] == LF) /* do nothing on the CR */
1947                    continue;
1948                paras_limit[paraCount - 1] = i;
1949                if (isDefaultLevelInverse && lastStrong == R)
1950                    paras_level[paraCount - 1] = 1;
1951                if ((reorderingOptions & OPTION_STREAMING) != 0) {
1952                /* When streaming, we only process whole paragraphs
1953                   thus some updates are only done on paragraph boundaries */
1954                   length = i;          /* i is index to next character */
1955                   this.controlCount = controlCount;
1956                }
1957                if (i < originalLength) {       /* B not last char in text */
1958                    paraCount++;
1959                    checkParaCount();   /* check that there is enough memory for a new para entry */
1960                    if (isDefaultLevel) {
1961                        paras_level[paraCount - 1] = defaultParaLevel;
1962                        state = SEEKING_STRONG_FOR_PARA;
1963                        lastStrong = defaultParaLevel;
1964                    } else {
1965                        paras_level[paraCount - 1] = paraLevel;
1966                        state = NOT_SEEKING_STRONG;
1967                    }
1968                    stackLast = -1;
1969                }
1970                continue;
1971            }
1972        }
1973        /* +Ignore still open isolate sequences with overflow */
1974        if (stackLast > MAX_EXPLICIT_LEVEL) {
1975            stackLast = MAX_EXPLICIT_LEVEL;
1976            state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
1977        }
1978        /* Resolve direction of still unresolved open FSI sequences */
1979        while (stackLast >= 0) {
1980            if (state == SEEKING_STRONG_FOR_FSI) {
1981                /* no need for next statement, already set by default */
1982                /* dirProps[isolateStartStack[stackLast]] = LRI; */
1983                flags |= DirPropFlag(LRI);
1984                break;
1985            }
1986            state = previousStateStack[stackLast];
1987            stackLast--;
1988        }
1989        /* When streaming, ignore text after the last paragraph separator */
1990        if ((reorderingOptions & OPTION_STREAMING) != 0) {
1991            if (length < originalLength)
1992                paraCount--;
1993        } else {
1994            paras_limit[paraCount - 1] = originalLength;
1995            this.controlCount = controlCount;
1996        }
1997        /* For inverse bidi, default para direction is RTL if there is
1998           a strong R or AL at either end of the paragraph */
1999        if (isDefaultLevelInverse && lastStrong == R) {
2000            paras_level[paraCount - 1] = 1;
2001        }
2002        if (isDefaultLevel) {
2003            paraLevel = paras_level[0];
2004        }
2005        /* The following is needed to resolve the text direction for default level
2006           paragraphs containing no strong character */
2007        for (i = 0; i < paraCount; i++)
2008            flags |= DirPropFlagLR(paras_level[i]);
2009
2010        if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
2011            flags |= DirPropFlag(L);
2012        }
2013    }
2014
2015    /* determine the paragraph level at position index */
2016    byte GetParaLevelAt(int pindex)
2017    {
2018        if (defaultParaLevel == 0 || pindex < paras_limit[0])
2019            return paraLevel;
2020        int i;
2021        for (i = 1; i < paraCount; i++)
2022            if (pindex < paras_limit[i])
2023                break;
2024        if (i >= paraCount)
2025            i = paraCount - 1;
2026        return paras_level[i];
2027    }
2028
2029    /* Functions for handling paired brackets ----------------------------------- */
2030
2031    /* In the isoRuns array, the first entry is used for text outside of any
2032       isolate sequence.  Higher entries are used for each more deeply nested
2033       isolate sequence. isoRunLast is the index of the last used entry.  The
2034       openings array is used to note the data of opening brackets not yet
2035       matched by a closing bracket, or matched but still susceptible to change
2036       level.
2037       Each isoRun entry contains the index of the first and
2038       one-after-last openings entries for pending opening brackets it
2039       contains.  The next openings entry to use is the one-after-last of the
2040       most deeply nested isoRun entry.
2041       isoRun entries also contain their current embedding level and the last
2042       encountered strong character, since these will be needed to resolve
2043       the level of paired brackets.  */
2044
2045    private void bracketInit(BracketData bd) {
2046        bd.isoRunLast = 0;
2047        bd.isoRuns[0] = new IsoRun();
2048        bd.isoRuns[0].start = 0;
2049        bd.isoRuns[0].limit = 0;
2050        bd.isoRuns[0].level = GetParaLevelAt(0);
2051        bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(GetParaLevelAt(0) & 1);
2052        bd.isoRuns[0].contextPos = 0;
2053        bd.openings = new Opening[SIMPLE_OPENINGS_COUNT];
2054        bd.isNumbersSpecial = reorderingMode == REORDER_NUMBERS_SPECIAL ||
2055                              reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
2056    }
2057
2058    /* paragraph boundary */
2059    private void bracketProcessB(BracketData bd, byte level) {
2060        bd.isoRunLast = 0;
2061        bd.isoRuns[0].limit = 0;
2062        bd.isoRuns[0].level = level;
2063        bd.isoRuns[0].lastStrong = bd.isoRuns[0].lastBase = bd.isoRuns[0].contextDir = (byte)(level & 1);
2064        bd.isoRuns[0].contextPos = 0;
2065    }
2066
2067    /* LRE, LRO, RLE, RLO, PDF */
2068    private void bracketProcessBoundary(BracketData bd, int lastCcPos,
2069                                        byte contextLevel, byte embeddingLevel) {
2070        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2071        if ((DirPropFlag(dirProps[lastCcPos]) & MASK_ISO) != 0) /* after an isolate */
2072            return;
2073        if (NoOverride(embeddingLevel) > NoOverride(contextLevel))  /* not a PDF */
2074            contextLevel = embeddingLevel;
2075        pLastIsoRun.limit = pLastIsoRun.start;
2076        pLastIsoRun.level = embeddingLevel;
2077        pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(contextLevel & 1);
2078        pLastIsoRun.contextPos = lastCcPos;
2079    }
2080
2081    /* LRI or RLI */
2082    private void bracketProcessLRI_RLI(BracketData bd, byte level) {
2083        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2084        short lastLimit;
2085        pLastIsoRun.lastBase = ON;
2086        lastLimit = pLastIsoRun.limit;
2087        bd.isoRunLast++;
2088        pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2089        if (pLastIsoRun == null)
2090            pLastIsoRun = bd.isoRuns[bd.isoRunLast] = new IsoRun();
2091        pLastIsoRun.start = pLastIsoRun.limit = lastLimit;
2092        pLastIsoRun.level = level;
2093        pLastIsoRun.lastStrong = pLastIsoRun.lastBase = pLastIsoRun.contextDir = (byte)(level & 1);
2094        pLastIsoRun.contextPos = 0;
2095    }
2096
2097    /* PDI */
2098    private void bracketProcessPDI(BracketData bd) {
2099        IsoRun pLastIsoRun;
2100        bd.isoRunLast--;
2101        pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2102        pLastIsoRun.lastBase = ON;
2103    }
2104
2105    /* newly found opening bracket: create an openings entry */
2106    private void bracketAddOpening(BracketData bd, char match, int position) {
2107        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2108        Opening pOpening;
2109        if (pLastIsoRun.limit >= bd.openings.length) {  /* no available new entry */
2110            Opening[] saveOpenings = bd.openings;
2111            int count;
2112            try {
2113                count = bd.openings.length;
2114                bd.openings = new Opening[count * 2];
2115            } catch (Exception e) {
2116                throw new OutOfMemoryError("Failed to allocate memory for openings");
2117            }
2118            System.arraycopy(saveOpenings, 0, bd.openings, 0, count);
2119        }
2120        pOpening = bd.openings[pLastIsoRun.limit];
2121        if (pOpening == null)
2122            pOpening = bd.openings[pLastIsoRun.limit]= new Opening();
2123        pOpening.position = position;
2124        pOpening.match = match;
2125        pOpening.contextDir = pLastIsoRun.contextDir;
2126        pOpening.contextPos = pLastIsoRun.contextPos;
2127        pOpening.flags = 0;
2128        pLastIsoRun.limit++;
2129    }
2130
2131    /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
2132    private void fixN0c(BracketData bd, int openingIndex, int newPropPosition, byte newProp) {
2133        /* This function calls itself recursively */
2134        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2135        Opening qOpening;
2136        int k, openingPosition, closingPosition;
2137        for (k = openingIndex+1; k < pLastIsoRun.limit; k++) {
2138            qOpening = bd.openings[k];
2139            if (qOpening.match >= 0)    /* not an N0c match */
2140                continue;
2141            if (newPropPosition < qOpening.contextPos)
2142                break;
2143            if (newPropPosition >= qOpening.position)
2144                continue;
2145            if (newProp == qOpening.contextDir)
2146                break;
2147            openingPosition = qOpening.position;
2148            dirProps[openingPosition] = newProp;
2149            closingPosition = -(qOpening.match);
2150            dirProps[closingPosition] = newProp;
2151            qOpening.match = 0;                                 /* prevent further changes */
2152            fixN0c(bd, k, openingPosition, newProp);
2153            fixN0c(bd, k, closingPosition, newProp);
2154        }
2155    }
2156
2157    /* process closing bracket; return L or R if N0b or N0c, ON if N0d */
2158    private byte bracketProcessClosing(BracketData bd, int openIdx, int position) {
2159        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2160        Opening pOpening, qOpening;
2161        byte direction;
2162        boolean stable;
2163        byte newProp;
2164        pOpening = bd.openings[openIdx];
2165        direction = (byte)(pLastIsoRun.level & 1);
2166        stable = true;          /* assume stable until proved otherwise */
2167
2168        /* The stable flag is set when brackets are paired and their
2169           level is resolved and cannot be changed by what will be
2170           found later in the source string.
2171           An unstable match can occur only when applying N0c, where
2172           the resolved level depends on the preceding context, and
2173           this context may be affected by text occurring later.
2174           Example: RTL paragraph containing:  abc[(latin) HEBREW]
2175           When the closing parenthesis is encountered, it appears
2176           that N0c1 must be applied since 'abc' sets an opposite
2177           direction context and both parentheses receive level 2.
2178           However, when the closing square bracket is processed,
2179           N0b applies because of 'HEBREW' being included within the
2180           brackets, thus the square brackets are treated like R and
2181           receive level 1. However, this changes the preceding
2182           context of the opening parenthesis, and it now appears
2183           that N0c2 must be applied to the parentheses rather than
2184           N0c1. */
2185
2186            if ((direction == 0 && (pOpening.flags & FOUND_L) > 0) ||
2187                (direction == 1 && (pOpening.flags & FOUND_R) > 0)) {   /* N0b */
2188                newProp = direction;
2189            }
2190            else if ((pOpening.flags & (FOUND_L | FOUND_R)) != 0) {     /* N0c */
2191                    /* it is stable if there is no preceding text or in
2192                       conditions too complicated and not worth checking */
2193                    stable = (openIdx == pLastIsoRun.start);
2194                if (direction != pOpening.contextDir)
2195                    newProp = pOpening.contextDir;                      /* N0c1 */
2196                else
2197                    newProp = direction;                                /* N0c2 */
2198            } else {
2199            /* forget this and any brackets nested within this pair */
2200            pLastIsoRun.limit = (short)openIdx;
2201            return ON;                                                  /* N0d */
2202        }
2203        dirProps[pOpening.position] = newProp;
2204        dirProps[position] = newProp;
2205        /* Update nested N0c pairs that may be affected */
2206        fixN0c(bd, openIdx, pOpening.position, newProp);
2207        if (stable) {
2208            pLastIsoRun.limit = (short)openIdx; /* forget any brackets nested within this pair */
2209            /* remove lower located synonyms if any */
2210            while (pLastIsoRun.limit > pLastIsoRun.start &&
2211                   bd.openings[pLastIsoRun.limit - 1].position == pOpening.position)
2212                pLastIsoRun.limit--;
2213        } else {
2214            int k;
2215            pOpening.match = -position;
2216            /* neutralize lower located synonyms if any */
2217            k = openIdx - 1;
2218            while (k >= pLastIsoRun.start &&
2219                   bd.openings[k].position == pOpening.position)
2220                bd.openings[k--].match = 0;
2221            /* neutralize any unmatched opening between the current pair;
2222               this will also neutralize higher located synonyms if any */
2223            for (k = openIdx + 1; k < pLastIsoRun.limit; k++) {
2224                qOpening =bd.openings[k];
2225                if (qOpening.position >= position)
2226                    break;
2227                if (qOpening.match > 0)
2228                    qOpening.match = 0;
2229            }
2230        }
2231        return newProp;
2232    }
2233
2234    /* handle strong characters, digits and candidates for closing brackets */
2235    private void bracketProcessChar(BracketData bd, int position) {
2236        IsoRun pLastIsoRun = bd.isoRuns[bd.isoRunLast];
2237        byte dirProp, newProp;
2238        byte level;
2239        dirProp = dirProps[position];
2240        if (dirProp == ON) {
2241            char c, match;
2242            int idx;
2243            /* First see if it is a matching closing bracket. Hopefully, this is
2244               more efficient than checking if it is a closing bracket at all */
2245            c = text[position];
2246            for (idx = pLastIsoRun.limit - 1; idx >= pLastIsoRun.start; idx--) {
2247                if (bd.openings[idx].match != c)
2248                    continue;
2249                /* We have a match */
2250                newProp = bracketProcessClosing(bd, idx, position);
2251                if(newProp == ON) {         /* N0d */
2252                    c = 0;          /* prevent handling as an opening */
2253                    break;
2254                }
2255                pLastIsoRun.lastBase = ON;
2256                pLastIsoRun.contextDir = newProp;
2257                pLastIsoRun.contextPos = position;
2258                level = levels[position];
2259                if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
2260                    short flag;
2261                    int i;
2262                    newProp = (byte)(level & 1);
2263                    pLastIsoRun.lastStrong = newProp;
2264                    flag = (short)DirPropFlag(newProp);
2265                    for (i = pLastIsoRun.start; i < idx; i++)
2266                        bd.openings[i].flags |= flag;
2267                    /* matching brackets are not overridden by LRO/RLO */
2268                    levels[position] &= ~LEVEL_OVERRIDE;
2269                }
2270                /* matching brackets are not overridden by LRO/RLO */
2271                levels[bd.openings[idx].position] &= ~LEVEL_OVERRIDE;
2272                return;
2273            }
2274            /* We get here only if the ON character is not a matching closing
2275               bracket or it is a case of N0d */
2276            /* Now see if it is an opening bracket */
2277            if (c != 0)
2278                match = (char)UCharacter.getBidiPairedBracket(c); /* get the matching char */
2279            else
2280                match = 0;
2281            if (match != c &&               /* has a matching char */
2282                UCharacter.getIntPropertyValue(c, UProperty.BIDI_PAIRED_BRACKET_TYPE) ==
2283                    /* opening bracket */         UCharacter.BidiPairedBracketType.OPEN) {
2284                /* special case: process synonyms
2285                   create an opening entry for each synonym */
2286                if (match == 0x232A) {      /* RIGHT-POINTING ANGLE BRACKET */
2287                    bracketAddOpening(bd, (char)0x3009, position);
2288                }
2289                else if (match == 0x3009) { /* RIGHT ANGLE BRACKET */
2290                    bracketAddOpening(bd, (char)0x232A, position);
2291                }
2292                bracketAddOpening(bd, match, position);
2293            }
2294        }
2295        level = levels[position];
2296        if ((level & LEVEL_OVERRIDE) != 0) {    /* X4, X5 */
2297            newProp = (byte)(level & 1);
2298            if (dirProp != S && dirProp != WS && dirProp != ON)
2299                dirProps[position] = newProp;
2300            pLastIsoRun.lastBase = newProp;
2301            pLastIsoRun.lastStrong = newProp;
2302            pLastIsoRun.contextDir = newProp;
2303            pLastIsoRun.contextPos = position;
2304        }
2305        else if (dirProp <= R || dirProp == AL) {
2306            newProp = DirFromStrong(dirProp);
2307            pLastIsoRun.lastBase = dirProp;
2308            pLastIsoRun.lastStrong = dirProp;
2309            pLastIsoRun.contextDir = newProp;
2310            pLastIsoRun.contextPos = position;
2311        }
2312        else if(dirProp == EN) {
2313            pLastIsoRun.lastBase = EN;
2314            if (pLastIsoRun.lastStrong == L) {
2315                newProp = L;                    /* W7 */
2316                if (!bd.isNumbersSpecial)
2317                    dirProps[position] = ENL;
2318                pLastIsoRun.contextDir = L;
2319                pLastIsoRun.contextPos = position;
2320            }
2321            else {
2322                newProp = R;                    /* N0 */
2323                if (pLastIsoRun.lastStrong == AL)
2324                    dirProps[position] = AN;    /* W2 */
2325                else
2326                    dirProps[position] = ENR;
2327                pLastIsoRun.contextDir = R;
2328                pLastIsoRun.contextPos = position;
2329            }
2330        }
2331        else if (dirProp == AN) {
2332            newProp = R;                        /* N0 */
2333            pLastIsoRun.lastBase = AN;
2334            pLastIsoRun.contextDir = R;
2335            pLastIsoRun.contextPos = position;
2336        }
2337        else if (dirProp == NSM) {
2338            /* if the last real char was ON, change NSM to ON so that it
2339               will stay ON even if the last real char is a bracket which
2340               may be changed to L or R */
2341            newProp = pLastIsoRun.lastBase;
2342            if (newProp == ON)
2343                dirProps[position] = newProp;
2344        }
2345        else {
2346            newProp = dirProp;
2347            pLastIsoRun.lastBase = dirProp;
2348        }
2349        if (newProp <= R || newProp == AL) {
2350            int i;
2351            short flag = (short)DirPropFlag(DirFromStrong(newProp));
2352            for (i = pLastIsoRun.start; i < pLastIsoRun.limit; i++)
2353                if (position > bd.openings[i].position)
2354                    bd.openings[i].flags |= flag;
2355        }
2356    }
2357
2358    /* perform (X1)..(X9) ------------------------------------------------------- */
2359
2360    /* determine if the text is mixed-directional or single-directional */
2361    private byte directionFromFlags() {
2362        /* if the text contains AN and neutrals, then some neutrals may become RTL */
2363        if (!((flags & MASK_RTL) != 0 ||
2364              ((flags & DirPropFlag(AN)) != 0 &&
2365               (flags & MASK_POSSIBLE_N) != 0))) {
2366            return LTR;
2367        } else if ((flags & MASK_LTR) == 0) {
2368            return RTL;
2369        } else {
2370            return MIXED;
2371        }
2372    }
2373
2374    /*
2375 * Resolve the explicit levels as specified by explicit embedding codes.
2376 * Recalculate the flags to have them reflect the real properties
2377 * after taking the explicit embeddings into account.
2378 *
2379 * The BiDi algorithm is designed to result in the same behavior whether embedding
2380 * levels are externally specified (from "styled text", supposedly the preferred
2381 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
2382 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
2383 * However, in a real implementation, the removal of these codes and their index
2384 * positions in the plain text is undesirable since it would result in
2385 * reallocated, reindexed text.
2386 * Instead, this implementation leaves the codes in there and just ignores them
2387 * in the subsequent processing.
2388 * In order to get the same reordering behavior, positions with a BN or a not-isolate
2389 * explicit embedding code just get the same level assigned as the last "real"
2390 * character.
2391 *
2392 * Some implementations, not this one, then overwrite some of these
2393 * directionality properties at "real" same-level-run boundaries by
2394 * L or R codes so that the resolution of weak types can be performed on the
2395 * entire paragraph at once instead of having to parse it once more and
2396 * perform that resolution on same-level-runs.
2397 * This limits the scope of the implicit rules in effectively
2398 * the same way as the run limits.
2399 *
2400 * Instead, this implementation does not modify these codes, except for
2401 * paired brackets whose properties (ON) may be replaced by L or R.
2402 * On one hand, the paragraph has to be scanned for same-level-runs, but
2403 * on the other hand, this saves another loop to reset these codes,
2404 * or saves making and modifying a copy of dirProps[].
2405 *
2406 *
2407 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
2408 *
2409 *
2410 * Handling the stack of explicit levels (Xn):
2411 *
2412 * With the BiDi stack of explicit levels, as pushed with each
2413 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
2414 * the explicit level must never exceed MAX_EXPLICIT_LEVEL.
2415 *
2416 * In order to have a correct push-pop semantics even in the case of overflows,
2417 * overflow counters and a valid isolate counter are used as described in UAX#9
2418 * section 3.3.2 "Explicit Levels and Directions".
2419 *
2420 * This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
2421 *
2422 * Returns the direction
2423 *
2424 */
2425    private byte resolveExplicitLevels() {
2426        int i = 0;
2427        byte dirProp;
2428        byte level = GetParaLevelAt(0);
2429        byte dirct;
2430        isolateCount = 0;
2431
2432        /* determine if the text is mixed-directional or single-directional */
2433        dirct = directionFromFlags();
2434
2435        /* we may not need to resolve any explicit levels */
2436        if (dirct != MIXED) {
2437            /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
2438            return dirct;
2439        }
2440        if (reorderingMode > REORDER_LAST_LOGICAL_TO_VISUAL) {
2441            /* inverse BiDi: mixed, but all characters are at the same embedding level */
2442            /* set all levels to the paragraph level */
2443            int paraIndex, start, limit;
2444            for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2445                if (paraIndex == 0)
2446                    start = 0;
2447                else
2448                    start = paras_limit[paraIndex - 1];
2449                limit = paras_limit[paraIndex];
2450                level = paras_level[paraIndex];
2451                for (i = start; i < limit; i++)
2452                    levels[i] =level;
2453            }
2454            return dirct;               /* no bracket matching for inverse BiDi */
2455        }
2456        if ((flags & (MASK_EXPLICIT | MASK_ISO)) == 0) {
2457            /* no embeddings, set all levels to the paragraph level */
2458            /* we still have to perform bracket matching */
2459            int paraIndex, start, limit;
2460            BracketData bracketData = new BracketData();
2461            bracketInit(bracketData);
2462            for (paraIndex = 0; paraIndex < paraCount; paraIndex++) {
2463                if (paraIndex == 0)
2464                    start = 0;
2465                else
2466                    start = paras_limit[paraIndex-1];
2467                limit = paras_limit[paraIndex];
2468                level = paras_level[paraIndex];
2469                for (i = start; i < limit; i++) {
2470                    levels[i] = level;
2471                    dirProp = dirProps[i];
2472                    if (dirProp == BN)
2473                        continue;
2474                    if (dirProp == B) {
2475                        if ((i + 1) < length) {
2476                            if (text[i] == CR && text[i + 1] == LF)
2477                                continue;   /* skip CR when followed by LF */
2478                            bracketProcessB(bracketData, level);
2479                        }
2480                        continue;
2481                    }
2482                    bracketProcessChar(bracketData, i);
2483                }
2484            }
2485            return dirct;
2486        }
2487        /* continue to perform (Xn) */
2488
2489        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
2490        /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
2491        byte embeddingLevel = level, newLevel;
2492        byte previousLevel = level; /* previous level for regular (not CC) characters */
2493        int lastCcPos = 0;          /* index of last effective LRx,RLx, PDx */
2494
2495        /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
2496           stackLast points to its current entry. */
2497        short[] stack = new short[MAX_EXPLICIT_LEVEL + 2];  /* we never push anything >= MAX_EXPLICIT_LEVEL
2498                                                               but we need one more entry as base */
2499        int stackLast = 0;
2500        int overflowIsolateCount = 0;
2501        int overflowEmbeddingCount = 0;
2502        int validIsolateCount = 0;
2503        BracketData bracketData = new BracketData();
2504        bracketInit(bracketData);
2505        stack[0] = level;       /* initialize base entry to para level, no override, no isolate */
2506
2507        /* recalculate the flags */
2508        flags = 0;
2509
2510        for (i = 0; i < length; i++) {
2511            dirProp = dirProps[i];
2512            switch (dirProp) {
2513            case LRE:
2514            case RLE:
2515            case LRO:
2516            case RLO:
2517                /* (X2, X3, X4, X5) */
2518                flags |= DirPropFlag(BN);
2519                levels[i] = previousLevel;
2520                if (dirProp == LRE || dirProp == LRO)
2521                    /* least greater even level */
2522                    newLevel = (byte)((embeddingLevel+2) & ~(LEVEL_OVERRIDE | 1));
2523                else
2524                    /* least greater odd level */
2525                    newLevel = (byte)((NoOverride(embeddingLevel) + 1) | 1);
2526                if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0 &&
2527                                                      overflowEmbeddingCount == 0) {
2528                    lastCcPos = i;
2529                    embeddingLevel = newLevel;
2530                    if (dirProp == LRO || dirProp == RLO)
2531                        embeddingLevel |= LEVEL_OVERRIDE;
2532                    stackLast++;
2533                    stack[stackLast] = embeddingLevel;
2534                    /* we don't need to set LEVEL_OVERRIDE off for LRE and RLE
2535                       since this has already been done for newLevel which is
2536                       the source for embeddingLevel.
2537                     */
2538                } else {
2539                    if (overflowIsolateCount == 0)
2540                        overflowEmbeddingCount++;
2541                }
2542                break;
2543            case PDF:
2544                /* (X7) */
2545                flags |= DirPropFlag(BN);
2546                levels[i] = previousLevel;
2547                /* handle all the overflow cases first */
2548                if (overflowIsolateCount > 0) {
2549                    break;
2550                }
2551                if (overflowEmbeddingCount > 0) {
2552                    overflowEmbeddingCount--;
2553                    break;
2554                }
2555                if (stackLast > 0 && stack[stackLast] < ISOLATE) {   /* not an isolate entry */
2556                    lastCcPos = i;
2557                    stackLast--;
2558                    embeddingLevel = (byte)stack[stackLast];
2559                }
2560                break;
2561            case LRI:
2562            case RLI:
2563                flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2564                levels[i] = NoOverride(embeddingLevel);
2565                if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2566                    bracketProcessBoundary(bracketData, lastCcPos,
2567                                           previousLevel, embeddingLevel);
2568                    flags |= DirPropFlagMultiRuns;
2569                }
2570                previousLevel = embeddingLevel;
2571                /* (X5a, X5b) */
2572                if (dirProp == LRI)
2573                    /* least greater even level */
2574                    newLevel=(byte)((embeddingLevel+2)&~(LEVEL_OVERRIDE|1));
2575                else
2576                    /* least greater odd level */
2577                    newLevel=(byte)((NoOverride(embeddingLevel)+1)|1);
2578                if (newLevel <= MAX_EXPLICIT_LEVEL && overflowIsolateCount == 0
2579                                                   && overflowEmbeddingCount == 0) {
2580                    flags |= DirPropFlag(dirProp);
2581                    lastCcPos = i;
2582                    validIsolateCount++;
2583                    if (validIsolateCount > isolateCount)
2584                        isolateCount = validIsolateCount;
2585                    embeddingLevel = newLevel;
2586                    /* we can increment stackLast without checking because newLevel
2587                       will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
2588                    stackLast++;
2589                    stack[stackLast] = (short)(embeddingLevel + ISOLATE);
2590                    bracketProcessLRI_RLI(bracketData, embeddingLevel);
2591                } else {
2592                    /* make it WS so that it is handled by adjustWSLevels() */
2593                    dirProps[i] = WS;
2594                    overflowIsolateCount++;
2595                }
2596                break;
2597            case PDI:
2598                if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2599                    bracketProcessBoundary(bracketData, lastCcPos,
2600                                           previousLevel, embeddingLevel);
2601                    flags |= DirPropFlagMultiRuns;
2602                }
2603                /* (X6a) */
2604                if (overflowIsolateCount > 0) {
2605                    overflowIsolateCount--;
2606                    /* make it WS so that it is handled by adjustWSLevels() */
2607                    dirProps[i] = WS;
2608                }
2609                else if (validIsolateCount > 0) {
2610                    flags |= DirPropFlag(PDI);
2611                    lastCcPos = i;
2612                    overflowEmbeddingCount = 0;
2613                    while (stack[stackLast] < ISOLATE)  /* pop embedding entries */
2614                        stackLast--;                    /* until the last isolate entry */
2615                    stackLast--;                        /* pop also the last isolate entry */
2616                    validIsolateCount--;
2617                    bracketProcessPDI(bracketData);
2618                } else
2619                    /* make it WS so that it is handled by adjustWSLevels() */
2620                    dirProps[i] = WS;
2621                embeddingLevel = (byte)(stack[stackLast] & ~ISOLATE);
2622                flags |= DirPropFlag(ON) | DirPropFlagLR(embeddingLevel);
2623                previousLevel = embeddingLevel;
2624                levels[i] = NoOverride(embeddingLevel);
2625                break;
2626            case B:
2627                flags |= DirPropFlag(B);
2628                levels[i] = GetParaLevelAt(i);
2629                if ((i + 1) < length) {
2630                    if (text[i] == CR && text[i + 1] == LF)
2631                        break;          /* skip CR when followed by LF */
2632                    overflowEmbeddingCount = overflowIsolateCount = 0;
2633                    validIsolateCount = 0;
2634                    stackLast = 0;
2635                    previousLevel = embeddingLevel = GetParaLevelAt(i + 1);
2636                    stack[0] = embeddingLevel;   /* initialize base entry to para level, no override, no isolate */
2637                    bracketProcessB(bracketData, embeddingLevel);
2638                }
2639                break;
2640            case BN:
2641                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
2642                /* they will get their levels set correctly in adjustWSLevels() */
2643                levels[i] = previousLevel;
2644                flags |= DirPropFlag(BN);
2645                break;
2646            default:
2647                /* all other types are normal characters and get the "real" level */
2648                if (NoOverride(embeddingLevel) != NoOverride(previousLevel)) {
2649                    bracketProcessBoundary(bracketData, lastCcPos,
2650                                           previousLevel, embeddingLevel);
2651                    flags |= DirPropFlagMultiRuns;
2652                    if ((embeddingLevel & LEVEL_OVERRIDE) != 0)
2653                        flags |= DirPropFlagO(embeddingLevel);
2654                    else
2655                        flags |= DirPropFlagE(embeddingLevel);
2656                }
2657                previousLevel = embeddingLevel;
2658                levels[i] = embeddingLevel;
2659                bracketProcessChar(bracketData, i);
2660                /* the dirProp may have been changed in bracketProcessChar() */
2661                flags |= DirPropFlag(dirProps[i]);
2662                break;
2663            }
2664        }
2665        if ((flags & MASK_EMBEDDING) != 0) {
2666            flags |= DirPropFlagLR(paraLevel);
2667        }
2668        if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
2669            flags |= DirPropFlag(L);
2670        }
2671        /* again, determine if the text is mixed-directional or single-directional */
2672        dirct = directionFromFlags();
2673
2674        return dirct;
2675    }
2676
2677    /**
2678     * Use a pre-specified embedding levels array:
2679     *
2680     * <p>Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
2681     * ignore all explicit codes (X9),
2682     * and check all the preset levels.
2683     *
2684     * <p>Recalculate the flags to have them reflect the real properties
2685     * after taking the explicit embeddings into account.
2686     */
2687    private byte checkExplicitLevels() {
2688        int isolateCount = 0;
2689
2690        this.flags = 0;     /* collect all directionalities in the text */
2691        this.isolateCount = 0;
2692
2693        int currentParaIndex = 0;
2694        int currentParaLimit = paras_limit[0];
2695        byte currentParaLevel = paraLevel;
2696
2697        for (int i = 0; i < length; ++i) {
2698            byte level = levels[i];
2699            byte dirProp = dirProps[i];
2700            if (dirProp == LRI || dirProp == RLI) {
2701                isolateCount++;
2702                if (isolateCount > this.isolateCount)
2703                    this.isolateCount = isolateCount;
2704            }
2705            else if (dirProp == PDI)
2706                isolateCount--;
2707            else if (dirProp == B)
2708                isolateCount = 0;
2709
2710            // optimized version of  byte currentParaLevel = GetParaLevelAt(i);
2711            if (defaultParaLevel != 0 &&
2712                    i == currentParaLimit && (currentParaIndex + 1) < paraCount) {
2713                currentParaLevel = paras_level[++currentParaIndex];
2714                currentParaLimit = paras_limit[currentParaIndex];
2715            }
2716
2717            int overrideFlag = level & LEVEL_OVERRIDE;
2718            level &= ~LEVEL_OVERRIDE;
2719            if (level < currentParaLevel || MAX_EXPLICIT_LEVEL < level) {
2720                if (level == 0) {
2721                    if (dirProp == B) {
2722                        // Paragraph separators are ok with explicit level 0.
2723                        // Prevents reordering of paragraphs.
2724                    } else {
2725                        // Treat explicit level 0 as a wildcard for the paragraph level.
2726                        // Avoid making the caller guess what the paragraph level would be.
2727                        level = currentParaLevel;
2728                        levels[i] = (byte)(level | overrideFlag);
2729                    }
2730                } else {
2731                    // 1 <= level < currentParaLevel or MAX_EXPLICIT_LEVEL < level
2732                    throw new IllegalArgumentException("level " + level +
2733                                                       " out of bounds at " + i);
2734                }
2735            }
2736            if (overrideFlag != 0) {
2737                /* keep the override flag in levels[i] but adjust the flags */
2738                flags |= DirPropFlagO(level);
2739            } else {
2740                /* set the flags */
2741                flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
2742            }
2743        }
2744        if ((flags & MASK_EMBEDDING) != 0)
2745            flags |= DirPropFlagLR(paraLevel);
2746        /* determine if the text is mixed-directional or single-directional */
2747        return directionFromFlags();
2748    }
2749
2750    /*********************************************************************/
2751    /* The Properties state machine table                                */
2752    /*********************************************************************/
2753    /*                                                                   */
2754    /* All table cells are 8 bits:                                       */
2755    /*      bits 0..4:  next state                                       */
2756    /*      bits 5..7:  action to perform (if > 0)                       */
2757    /*                                                                   */
2758    /* Cells may be of format "n" where n represents the next state      */
2759    /* (except for the rightmost column).                                */
2760    /* Cells may also be of format "_(x,y)" where x represents an action */
2761    /* to perform and y represents the next state.                       */
2762    /*                                                                   */
2763    /*********************************************************************/
2764    /* Definitions and type for properties state tables                  */
2765    /*********************************************************************/
2766    private static final int IMPTABPROPS_COLUMNS = 16;
2767    private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
2768    private static short GetStateProps(short cell) {
2769        return (short)(cell & 0x1f);
2770    }
2771    private static short GetActionProps(short cell) {
2772        return (short)(cell >> 5);
2773    }
2774
2775    private static final short groupProp[] =          /* dirProp regrouped */
2776    {
2777        /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
2778            0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
2779    };
2780    private static final short _L  = 0;
2781    private static final short _R  = 1;
2782    private static final short _EN = 2;
2783    private static final short _AN = 3;
2784    private static final short _ON = 4;
2785    private static final short _S  = 5;
2786    private static final short _B  = 6; /* reduced dirProp */
2787
2788    /*********************************************************************/
2789    /*                                                                   */
2790    /*      PROPERTIES  STATE  TABLE                                     */
2791    /*                                                                   */
2792    /* In table impTabProps,                                             */
2793    /*      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI    */
2794    /*      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF         */
2795    /*      - the Res column is the reduced property assigned to a run   */
2796    /*                                                                   */
2797    /* Action 1: process current run1, init new run1                     */
2798    /*        2: init new run2                                           */
2799    /*        3: process run1, process run2, init new run1               */
2800    /*        4: process run1, set run1=run2, init new run2              */
2801    /*                                                                   */
2802    /* Notes:                                                            */
2803    /*  1) This table is used in resolveImplicitLevels().                */
2804    /*  2) This table triggers actions when there is a change in the Bidi*/
2805    /*     property of incoming characters (action 1).                   */
2806    /*  3) Most such property sequences are processed immediately (in    */
2807    /*     fact, passed to processPropertySeq().                         */
2808    /*  4) However, numbers are assembled as one sequence. This means    */
2809    /*     that undefined situations (like CS following digits, until    */
2810    /*     it is known if the next char will be a digit) are held until  */
2811    /*     following chars define them.                                  */
2812    /*     Example: digits followed by CS, then comes another CS or ON;  */
2813    /*              the digits will be processed, then the CS assigned   */
2814    /*              as the start of an ON sequence (action 3).           */
2815    /*  5) There are cases where more than one sequence must be          */
2816    /*     processed, for instance digits followed by CS followed by L:  */
2817    /*     the digits must be processed as one sequence, and the CS      */
2818    /*     must be processed as an ON sequence, all this before starting */
2819    /*     assembling chars for the opening L sequence.                  */
2820    /*                                                                   */
2821    /*                                                                   */
2822    private static final short impTabProps[][] =
2823    {
2824/*                        L,     R,    EN,    AN,    ON,     S,     B,    ES,    ET,    CS,    BN,   NSM,    AL,   ENL,   ENR,   Res */
2825/* 0 Init        */ {     1,     2,     4,     5,     7,    15,    17,     7,     9,     7,     0,     7,     3,    18,    21,   _ON },
2826/* 1 L           */ {     1,  32+2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     1,     1,  32+3, 32+18, 32+21,    _L },
2827/* 2 R           */ {  32+1,     2,  32+4,  32+5,  32+7, 32+15, 32+17,  32+7,  32+9,  32+7,     2,     2,  32+3, 32+18, 32+21,    _R },
2828/* 3 AL          */ {  32+1,  32+2,  32+6,  32+6,  32+8, 32+16, 32+17,  32+8,  32+8,  32+8,     3,     3,     3, 32+18, 32+21,    _R },
2829/* 4 EN          */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17, 64+10,    11, 64+10,     4,     4,  32+3,    18,    21,   _EN },
2830/* 5 AN          */ {  32+1,  32+2,  32+4,     5,  32+7, 32+15, 32+17,  32+7,  32+9, 64+12,     5,     5,  32+3, 32+18, 32+21,   _AN },
2831/* 6 AL:EN/AN    */ {  32+1,  32+2,     6,     6,  32+8, 32+16, 32+17,  32+8,  32+8, 64+13,     6,     6,  32+3,    18,    21,   _AN },
2832/* 7 ON          */ {  32+1,  32+2,  32+4,  32+5,     7, 32+15, 32+17,     7, 64+14,     7,     7,     7,  32+3, 32+18, 32+21,   _ON },
2833/* 8 AL:ON       */ {  32+1,  32+2,  32+6,  32+6,     8, 32+16, 32+17,     8,     8,     8,     8,     8,  32+3, 32+18, 32+21,   _ON },
2834/* 9 ET          */ {  32+1,  32+2,     4,  32+5,     7, 32+15, 32+17,     7,     9,     7,     9,     9,  32+3,    18,    21,   _ON },
2835/*10 EN+ES/CS    */ {  96+1,  96+2,     4,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    10, 128+7,  96+3,    18,    21,   _EN },
2836/*11 EN+ET       */ {  32+1,  32+2,     4,  32+5,  32+7, 32+15, 32+17,  32+7,    11,  32+7,    11,    11,  32+3,    18,    21,   _EN },
2837/*12 AN+CS       */ {  96+1,  96+2,  96+4,     5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    12, 128+7,  96+3, 96+18, 96+21,   _AN },
2838/*13 AL:EN/AN+CS */ {  96+1,  96+2,     6,     6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8,    13, 128+8,  96+3,    18,    21,   _AN },
2839/*14 ON+ET       */ {  32+1,  32+2, 128+4,  32+5,     7, 32+15, 32+17,     7,    14,     7,    14,    14,  32+3,128+18,128+21,   _ON },
2840/*15 S           */ {  32+1,  32+2,  32+4,  32+5,  32+7,    15, 32+17,  32+7,  32+9,  32+7,    15,  32+7,  32+3, 32+18, 32+21,    _S },
2841/*16 AL:S        */ {  32+1,  32+2,  32+6,  32+6,  32+8,    16, 32+17,  32+8,  32+8,  32+8,    16,  32+8,  32+3, 32+18, 32+21,    _S },
2842/*17 B           */ {  32+1,  32+2,  32+4,  32+5,  32+7, 32+15,    17,  32+7,  32+9,  32+7,    17,  32+7,  32+3, 32+18, 32+21,    _B },
2843/*18 ENL         */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17, 64+19,    20, 64+19,    18,    18,  32+3,    18,    21,    _L },
2844/*19 ENL+ES/CS   */ {  96+1,  96+2,    18,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    19, 128+7,  96+3,    18,    21,    _L },
2845/*20 ENL+ET      */ {  32+1,  32+2,    18,  32+5,  32+7, 32+15, 32+17,  32+7,    20,  32+7,    20,    20,  32+3,    18,    21,    _L },
2846/*21 ENR         */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17, 64+22,    23, 64+22,    21,    21,  32+3,    18,    21,   _AN },
2847/*22 ENR+ES/CS   */ {  96+1,  96+2,    21,  96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7,    22, 128+7,  96+3,    18,    21,   _AN },
2848/*23 ENR+ET      */ {  32+1,  32+2,    21,  32+5,  32+7, 32+15, 32+17,  32+7,    23,  32+7,    23,    23,  32+3,    18,    21,   _AN }
2849    };
2850
2851    /*********************************************************************/
2852    /* The levels state machine tables                                   */
2853    /*********************************************************************/
2854    /*                                                                   */
2855    /* All table cells are 8 bits:                                       */
2856    /*      bits 0..3:  next state                                       */
2857    /*      bits 4..7:  action to perform (if > 0)                       */
2858    /*                                                                   */
2859    /* Cells may be of format "n" where n represents the next state      */
2860    /* (except for the rightmost column).                                */
2861    /* Cells may also be of format "_(x,y)" where x represents an action */
2862    /* to perform and y represents the next state.                       */
2863    /*                                                                   */
2864    /* This format limits each table to 16 states each and to 15 actions.*/
2865    /*                                                                   */
2866    /*********************************************************************/
2867    /* Definitions and type for levels state tables                      */
2868    /*********************************************************************/
2869    private static final int IMPTABLEVELS_COLUMNS = _B + 2;
2870    private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
2871    private static short GetState(byte cell) { return (short)(cell & 0x0f); }
2872    private static short GetAction(byte cell) { return (short)(cell >> 4); }
2873
2874    private static class ImpTabPair {
2875        byte[][][] imptab;
2876        short[][] impact;
2877
2878        ImpTabPair(byte[][] table1, byte[][] table2,
2879                   short[] act1, short[] act2) {
2880            imptab = new byte[][][] {table1, table2};
2881            impact = new short[][] {act1, act2};
2882        }
2883    }
2884
2885    /*********************************************************************/
2886    /*                                                                   */
2887    /*      LEVELS  STATE  TABLES                                        */
2888    /*                                                                   */
2889    /* In all levels state tables,                                       */
2890    /*      - state 0 is the initial state                               */
2891    /*      - the Res column is the increment to add to the text level   */
2892    /*        for this property sequence.                                */
2893    /*                                                                   */
2894    /* The impact arrays for each table of a pair map the local action   */
2895    /* numbers of the table to the total list of actions. For instance,  */
2896    /* action 2 in a given table corresponds to the action number which  */
2897    /* appears in entry [2] of the impact array for that table.          */
2898    /* The first entry of all impact arrays must be 0.                   */
2899    /*                                                                   */
2900    /* Action 1: init conditional sequence                               */
2901    /*        2: prepend conditional sequence to current sequence        */
2902    /*        3: set ON sequence to new level - 1                        */
2903    /*        4: init EN/AN/ON sequence                                  */
2904    /*        5: fix EN/AN/ON sequence followed by R                     */
2905    /*        6: set previous level sequence to level 2                  */
2906    /*                                                                   */
2907    /* Notes:                                                            */
2908    /*  1) These tables are used in processPropertySeq(). The input      */
2909    /*     is property sequences as determined by resolveImplicitLevels. */
2910    /*  2) Most such property sequences are processed immediately        */
2911    /*     (levels are assigned).                                        */
2912    /*  3) However, some sequences cannot be assigned a final level till */
2913    /*     one or more following sequences are received. For instance,   */
2914    /*     ON following an R sequence within an even-level paragraph.    */
2915    /*     If the following sequence is R, the ON sequence will be       */
2916    /*     assigned basic run level+1, and so will the R sequence.       */
2917    /*  4) S is generally handled like ON, since its level will be fixed */
2918    /*     to paragraph level in adjustWSLevels().                       */
2919    /*                                                                   */
2920
2921    private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
2922        /*  In this table, conditional sequences receive the lower possible level
2923            until proven otherwise.
2924        */
2925    {
2926        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2927        /* 0 : init       */ {     0,     1,     0,     2,     0,     0,     0,  0 },
2928        /* 1 : R          */ {     0,     1,     3,     3,  0x14,  0x14,     0,  1 },
2929        /* 2 : AN         */ {     0,     1,     0,     2,  0x15,  0x15,     0,  2 },
2930        /* 3 : R+EN/AN    */ {     0,     1,     3,     3,  0x14,  0x14,     0,  2 },
2931        /* 4 : R+ON       */ {     0,  0x21,  0x33,  0x33,     4,     4,     0,  0 },
2932        /* 5 : AN+ON      */ {     0,  0x21,     0,  0x32,     5,     5,     0,  0 }
2933    };
2934
2935    private static final byte impTabR_DEFAULT[][] = /* Odd  paragraph level */
2936        /*  In this table, conditional sequences receive the lower possible level
2937            until proven otherwise.
2938        */
2939    {
2940        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2941        /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
2942        /* 1 : L          */ {     1,     0,     1,     3,  0x14,  0x14,     0,  1 },
2943        /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
2944        /* 3 : L+AN       */ {     1,     0,     1,     3,     5,     5,     0,  1 },
2945        /* 4 : L+ON       */ {  0x21,     0,  0x21,     3,     4,     4,     0,  0 },
2946        /* 5 : L+AN+ON    */ {     1,     0,     1,     3,     5,     5,     0,  0 }
2947    };
2948
2949    private static final short[] impAct0 = {0,1,2,3,4};
2950
2951    private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
2952            impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
2953
2954    private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
2955        /* In this table, conditional sequences receive the lower possible
2956           level until proven otherwise.
2957        */
2958        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2959        /* 0 : init       */ {     0,     2,  0x11,  0x11,     0,     0,     0,  0 },
2960        /* 1 : L+EN/AN    */ {     0,  0x42,     1,     1,     0,     0,     0,  0 },
2961        /* 2 : R          */ {     0,     2,     4,     4,  0x13,  0x13,     0,  1 },
2962        /* 3 : R+ON       */ {     0,  0x22,  0x34,  0x34,     3,     3,     0,  0 },
2963        /* 4 : R+EN/AN    */ {     0,     2,     4,     4,  0x13,  0x13,     0,  2 }
2964    };
2965    private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
2966            impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
2967
2968    private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
2969        /* In this table, EN/AN+ON sequences receive levels as if associated with R
2970           until proven that there is L or sor/eor on both sides. AN is handled like EN.
2971        */
2972        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2973        /* 0 init         */ {     0,     3,  0x11,  0x11,     0,     0,     0,  0 },
2974        /* 1 EN/AN        */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  2 },
2975        /* 2 EN/AN+ON     */ {  0x20,     3,     1,     1,     2,  0x20,  0x20,  1 },
2976        /* 3 R            */ {     0,     3,     5,     5,  0x14,     0,     0,  1 },
2977        /* 4 R+ON         */ {  0x20,     3,     5,     5,     4,  0x20,  0x20,  1 },
2978        /* 5 R+EN/AN      */ {     0,     3,     5,     5,  0x14,     0,     0,  2 }
2979    };
2980    private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
2981        /*  In this table, EN/AN+ON sequences receive levels as if associated with R
2982            until proven that there is L on both sides. AN is handled like EN.
2983        */
2984        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
2985        /* 0 init         */ {     2,     0,     1,     1,     0,     0,     0,  0 },
2986        /* 1 EN/AN        */ {     2,     0,     1,     1,     0,     0,     0,  1 },
2987        /* 2 L            */ {     2,     0,  0x14,  0x14,  0x13,     0,     0,  1 },
2988        /* 3 L+ON         */ {  0x22,     0,     4,     4,     3,     0,     0,  0 },
2989        /* 4 L+EN/AN      */ {  0x22,     0,     4,     4,     3,     0,     0,  1 }
2990    };
2991    private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
2992            ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
2993                       impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
2994
2995    private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
2996        /* This table is identical to the Default LTR table except that EN and AN
2997           are handled like L.
2998        */
2999        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3000        /* 0 : init       */ {     0,     1,     0,     0,     0,     0,     0,  0 },
3001        /* 1 : R          */ {     0,     1,     0,     0,  0x14,  0x14,     0,  1 },
3002        /* 2 : AN         */ {     0,     1,     0,     0,  0x15,  0x15,     0,  2 },
3003        /* 3 : R+EN/AN    */ {     0,     1,     0,     0,  0x14,  0x14,     0,  2 },
3004        /* 4 : R+ON       */ {  0x20,     1,  0x20,  0x20,     4,     4,  0x20,  1 },
3005        /* 5 : AN+ON      */ {  0x20,     1,  0x20,  0x20,     5,     5,  0x20,  1 }
3006    };
3007    private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
3008        /* This table is identical to the Default RTL table except that EN and AN
3009           are handled like L.
3010        */
3011        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3012        /* 0 : init       */ {     1,     0,     1,     1,     0,     0,     0,  0 },
3013        /* 1 : L          */ {     1,     0,     1,     1,  0x14,  0x14,     0,  1 },
3014        /* 2 : EN/AN      */ {     1,     0,     1,     1,     0,     0,     0,  1 },
3015        /* 3 : L+AN       */ {     1,     0,     1,     1,     5,     5,     0,  1 },
3016        /* 4 : L+ON       */ {  0x21,     0,  0x21,  0x21,     4,     4,     0,  0 },
3017        /* 5 : L+AN+ON    */ {     1,     0,     1,     1,     5,     5,     0,  0 }
3018    };
3019    private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
3020            (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
3021             impAct0, impAct0);
3022
3023    private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = {  /* Odd  paragraph level */
3024        /*  In this table, conditional sequences receive the lower possible level
3025            until proven otherwise.
3026        */
3027        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3028        /* 0 : init       */ {     1,     0,     2,     2,     0,     0,     0,  0 },
3029        /* 1 : L          */ {     1,     0,     1,     2,  0x13,  0x13,     0,  1 },
3030        /* 2 : EN/AN      */ {     1,     0,     2,     2,     0,     0,     0,  1 },
3031        /* 3 : L+ON       */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  0 },
3032        /* 4 : L+ON+AN    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  3 },
3033        /* 5 : L+AN+ON    */ {  0x21,  0x30,     6,     4,     5,     5,  0x30,  2 },
3034        /* 6 : L+ON+EN    */ {  0x21,  0x30,     6,     4,     3,     3,  0x30,  1 }
3035    };
3036    private static final short[] impAct1 = {0,1,13,14};
3037    private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
3038            impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
3039
3040    private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
3041        /* The case handled in this table is (visually):  R EN L
3042         */
3043        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3044        /* 0 : init       */ {     0,  0x63,     0,     1,     0,     0,     0,  0 },
3045        /* 1 : L+AN       */ {     0,  0x63,     0,     1,  0x12,  0x30,     0,  4 },
3046        /* 2 : L+AN+ON    */ {  0x20,  0x63,  0x20,     1,     2,  0x30,  0x20,  3 },
3047        /* 3 : R          */ {     0,  0x63,  0x55,  0x56,  0x14,  0x30,     0,  3 },
3048        /* 4 : R+ON       */ {  0x30,  0x43,  0x55,  0x56,     4,  0x30,  0x30,  3 },
3049        /* 5 : R+EN       */ {  0x30,  0x43,     5,  0x56,  0x14,  0x30,  0x30,  4 },
3050        /* 6 : R+AN       */ {  0x30,  0x43,  0x55,     6,  0x14,  0x30,  0x30,  4 }
3051    };
3052    private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
3053        /* The cases handled in this table are (visually):  R EN L
3054                                                            R L AN L
3055        */
3056        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3057        /* 0 : init       */ {  0x13,     0,     1,     1,     0,     0,     0,  0 },
3058        /* 1 : R+EN/AN    */ {  0x23,     0,     1,     1,     2,  0x40,     0,  1 },
3059        /* 2 : R+EN/AN+ON */ {  0x23,     0,     1,     1,     2,  0x40,     0,  0 },
3060        /* 3 : L          */ {     3,     0,     3,  0x36,  0x14,  0x40,     0,  1 },
3061        /* 4 : L+ON       */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  0 },
3062        /* 5 : L+ON+EN    */ {  0x53,  0x40,     5,  0x36,     4,  0x40,  0x40,  1 },
3063        /* 6 : L+AN       */ {  0x53,  0x40,     6,     6,     4,  0x40,  0x40,  3 }
3064    };
3065    private static final short[] impAct2 = {0,1,2,5,6,7,8};
3066    private static final short[] impAct3 = {0,1,9,10,11,12};
3067    private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
3068            new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
3069                           impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
3070
3071    private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
3072            impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
3073
3074    private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
3075        /*  The case handled in this table is (visually):  R EN L
3076        */
3077        /*                         L,     R,    EN,    AN,    ON,     S,     B, Res */
3078        /* 0 : init       */ {     0,  0x62,     1,     1,     0,     0,     0,  0 },
3079        /* 1 : L+EN/AN    */ {     0,  0x62,     1,     1,     0,  0x30,     0,  4 },
3080        /* 2 : R          */ {     0,  0x62,  0x54,  0x54,  0x13,  0x30,     0,  3 },
3081        /* 3 : R+ON       */ {  0x30,  0x42,  0x54,  0x54,     3,  0x30,  0x30,  3 },
3082        /* 4 : R+EN/AN    */ {  0x30,  0x42,     4,     4,  0x13,  0x30,  0x30,  4 }
3083    };
3084    private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
3085            ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
3086                       impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct2, impAct3);
3087
3088    private static class LevState {
3089        byte[][] impTab;                /* level table pointer          */
3090        short[] impAct;                 /* action map array             */
3091        int startON;                    /* start of ON sequence         */
3092        int startL2EN;                  /* start of level 2 sequence    */
3093        int lastStrongRTL;              /* index of last found R or AL  */
3094        int runStart;                   /* start position of the run    */
3095        short state;                    /* current state                */
3096        byte runLevel;                  /* run level before implicit solving */
3097    }
3098
3099    /*------------------------------------------------------------------------*/
3100
3101    static final int FIRSTALLOC = 10;
3102    /*
3103     *  param pos:     position where to insert
3104     *  param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
3105     */
3106    private void addPoint(int pos, int flag)
3107    {
3108        Point point = new Point();
3109
3110        int len = insertPoints.points.length;
3111        if (len == 0) {
3112            insertPoints.points = new Point[FIRSTALLOC];
3113            len = FIRSTALLOC;
3114        }
3115        if (insertPoints.size >= len) { /* no room for new point */
3116            Point[] savePoints = insertPoints.points;
3117            insertPoints.points = new Point[len * 2];
3118            System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
3119        }
3120        point.pos = pos;
3121        point.flag = flag;
3122        insertPoints.points[insertPoints.size] = point;
3123        insertPoints.size++;
3124    }
3125
3126    private void setLevelsOutsideIsolates(int start, int limit, byte level)
3127    {
3128        byte dirProp;
3129        int  isolateCount = 0, k;
3130        for (k = start; k < limit; k++) {
3131            dirProp = dirProps[k];
3132            if (dirProp == PDI)
3133                isolateCount--;
3134            if (isolateCount == 0)
3135                levels[k] = level;
3136            if (dirProp == LRI || dirProp == RLI)
3137                isolateCount++;
3138        }
3139    }
3140
3141    /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
3142
3143    /*
3144     * This implementation of the (Wn) rules applies all rules in one pass.
3145     * In order to do so, it needs a look-ahead of typically 1 character
3146     * (except for W5: sequences of ET) and keeps track of changes
3147     * in a rule Wp that affect a later Wq (p<q).
3148     *
3149     * The (Nn) and (In) rules are also performed in that same single loop,
3150     * but effectively one iteration behind for white space.
3151     *
3152     * Since all implicit rules are performed in one step, it is not necessary
3153     * to actually store the intermediate directional properties in dirProps[].
3154     */
3155
3156    private void processPropertySeq(LevState levState, short _prop,
3157            int start, int limit) {
3158        byte cell;
3159        byte[][] impTab = levState.impTab;
3160        short[] impAct = levState.impAct;
3161        short oldStateSeq,actionSeq;
3162        byte level, addLevel;
3163        int start0, k;
3164
3165        start0 = start;                 /* save original start position */
3166        oldStateSeq = levState.state;
3167        cell = impTab[oldStateSeq][_prop];
3168        levState.state = GetState(cell);        /* isolate the new state */
3169        actionSeq = impAct[GetAction(cell)];    /* isolate the action */
3170        addLevel = impTab[levState.state][IMPTABLEVELS_RES];
3171
3172        if (actionSeq != 0) {
3173            switch (actionSeq) {
3174            case 1:                     /* init ON seq */
3175                levState.startON = start0;
3176                break;
3177
3178            case 2:                     /* prepend ON seq to current seq */
3179                start = levState.startON;
3180                break;
3181
3182            case 3:                     /* EN/AN after R+ON */
3183                level = (byte)(levState.runLevel + 1);
3184                setLevelsOutsideIsolates(levState.startON, start0, level);
3185                break;
3186
3187            case 4:                     /* EN/AN before R for NUMBERS_SPECIAL */
3188                level = (byte)(levState.runLevel + 2);
3189                setLevelsOutsideIsolates(levState.startON, start0, level);
3190                break;
3191
3192            case 5:                     /* L or S after possible relevant EN/AN */
3193                /* check if we had EN after R/AL */
3194                if (levState.startL2EN >= 0) {
3195                    addPoint(levState.startL2EN, LRM_BEFORE);
3196                }
3197                levState.startL2EN = -1;  /* not within previous if since could also be -2 */
3198                /* check if we had any relevant EN/AN after R/AL */
3199                if ((insertPoints.points.length == 0) ||
3200                        (insertPoints.size <= insertPoints.confirmed)) {
3201                    /* nothing, just clean up */
3202                    levState.lastStrongRTL = -1;
3203                    /* check if we have a pending conditional segment */
3204                    level = impTab[oldStateSeq][IMPTABLEVELS_RES];
3205                    if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
3206                        start = levState.startON;   /* reset to basic run level */
3207                    }
3208                    if (_prop == _S) {              /* add LRM before S */
3209                        addPoint(start0, LRM_BEFORE);
3210                        insertPoints.confirmed = insertPoints.size;
3211                    }
3212                    break;
3213                }
3214                /* reset previous RTL cont to level for LTR text */
3215                for (k = levState.lastStrongRTL + 1; k < start0; k++) {
3216                    /* reset odd level, leave runLevel+2 as is */
3217                    levels[k] = (byte)((levels[k] - 2) & ~1);
3218                }
3219                /* mark insert points as confirmed */
3220                insertPoints.confirmed = insertPoints.size;
3221                levState.lastStrongRTL = -1;
3222                if (_prop == _S) {           /* add LRM before S */
3223                    addPoint(start0, LRM_BEFORE);
3224                    insertPoints.confirmed = insertPoints.size;
3225                }
3226                break;
3227
3228            case 6:                     /* R/AL after possible relevant EN/AN */
3229                /* just clean up */
3230                if (insertPoints.points.length > 0)
3231                    /* remove all non confirmed insert points */
3232                    insertPoints.size = insertPoints.confirmed;
3233                levState.startON = -1;
3234                levState.startL2EN = -1;
3235                levState.lastStrongRTL = limit - 1;
3236                break;
3237
3238            case 7:                     /* EN/AN after R/AL + possible cont */
3239                /* check for real AN */
3240                if ((_prop == _AN) && (dirProps[start0] == AN) &&
3241                (reorderingMode != REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
3242                {
3243                    /* real AN */
3244                    if (levState.startL2EN == -1) { /* if no relevant EN already found */
3245                        /* just note the rightmost digit as a strong RTL */
3246                        levState.lastStrongRTL = limit - 1;
3247                        break;
3248                    }
3249                    if (levState.startL2EN >= 0)  { /* after EN, no AN */
3250                        addPoint(levState.startL2EN, LRM_BEFORE);
3251                        levState.startL2EN = -2;
3252                    }
3253                    /* note AN */
3254                    addPoint(start0, LRM_BEFORE);
3255                    break;
3256                }
3257                /* if first EN/AN after R/AL */
3258                if (levState.startL2EN == -1) {
3259                    levState.startL2EN = start0;
3260                }
3261                break;
3262
3263            case 8:                     /* note location of latest R/AL */
3264                levState.lastStrongRTL = limit - 1;
3265                levState.startON = -1;
3266                break;
3267
3268            case 9:                     /* L after R+ON/EN/AN */
3269                /* include possible adjacent number on the left */
3270                for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
3271                }
3272                if (k >= 0) {
3273                    addPoint(k, RLM_BEFORE);    /* add RLM before */
3274                    insertPoints.confirmed = insertPoints.size; /* confirm it */
3275                }
3276                levState.startON = start0;
3277                break;
3278
3279            case 10:                    /* AN after L */
3280                /* AN numbers between L text on both sides may be trouble. */
3281                /* tentatively bracket with LRMs; will be confirmed if followed by L */
3282                addPoint(start0, LRM_BEFORE);   /* add LRM before */
3283                addPoint(start0, LRM_AFTER);    /* add LRM after  */
3284                break;
3285
3286            case 11:                    /* R after L+ON/EN/AN */
3287                /* false alert, infirm LRMs around previous AN */
3288                insertPoints.size=insertPoints.confirmed;
3289                if (_prop == _S) {          /* add RLM before S */
3290                    addPoint(start0, RLM_BEFORE);
3291                    insertPoints.confirmed = insertPoints.size;
3292                }
3293                break;
3294
3295            case 12:                    /* L after L+ON/AN */
3296                level = (byte)(levState.runLevel + addLevel);
3297                for (k=levState.startON; k < start0; k++) {
3298                    if (levels[k] < level) {
3299                        levels[k] = level;
3300                    }
3301                }
3302                insertPoints.confirmed = insertPoints.size;   /* confirm inserts */
3303                levState.startON = start0;
3304                break;
3305
3306            case 13:                    /* L after L+ON+EN/AN/ON */
3307                level = levState.runLevel;
3308                for (k = start0-1; k >= levState.startON; k--) {
3309                    if (levels[k] == level+3) {
3310                        while (levels[k] == level+3) {
3311                            levels[k--] -= 2;
3312                        }
3313                        while (levels[k] == level) {
3314                            k--;
3315                        }
3316                    }
3317                    if (levels[k] == level+2) {
3318                        levels[k] = level;
3319                        continue;
3320                    }
3321                    levels[k] = (byte)(level+1);
3322                }
3323                break;
3324
3325            case 14:                    /* R after L+ON+EN/AN/ON */
3326                level = (byte)(levState.runLevel+1);
3327                for (k = start0-1; k >= levState.startON; k--) {
3328                    if (levels[k] > level) {
3329                        levels[k] -= 2;
3330                    }
3331                }
3332                break;
3333
3334            default:                        /* we should never get here */
3335                throw new IllegalStateException("Internal ICU error in processPropertySeq");
3336            }
3337        }
3338        if ((addLevel) != 0 || (start < start0)) {
3339            level = (byte)(levState.runLevel + addLevel);
3340            if (start >= levState.runStart) {
3341                for (k = start; k < limit; k++) {
3342                    levels[k] = level;
3343                }
3344            } else {
3345                setLevelsOutsideIsolates(start, limit, level);
3346            }
3347        }
3348    }
3349
3350    /**
3351     * Returns the directionality of the last strong character at the end of the prologue, if any.
3352     * Requires prologue!=null.
3353     */
3354    private byte lastL_R_AL() {
3355        for (int i = prologue.length(); i > 0; ) {
3356            int uchar = prologue.codePointBefore(i);
3357            i -= Character.charCount(uchar);
3358            byte dirProp = (byte)getCustomizedClass(uchar);
3359            if (dirProp == L) {
3360                return _L;
3361            }
3362            if (dirProp == R || dirProp == AL) {
3363                return _R;
3364            }
3365            if(dirProp == B) {
3366                return _ON;
3367            }
3368        }
3369        return _ON;
3370    }
3371
3372    /**
3373     * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
3374     * Requires epilogue!=null.
3375     */
3376    private byte firstL_R_AL_EN_AN() {
3377        for (int i = 0; i < epilogue.length(); ) {
3378            int uchar = epilogue.codePointAt(i);
3379            i += Character.charCount(uchar);
3380            byte dirProp = (byte)getCustomizedClass(uchar);
3381            if (dirProp == L) {
3382                return _L;
3383            }
3384            if (dirProp == R || dirProp == AL) {
3385                return _R;
3386            }
3387            if (dirProp == EN) {
3388                return _EN;
3389            }
3390            if (dirProp == AN) {
3391                return _AN;
3392            }
3393        }
3394        return _ON;
3395    }
3396
3397    private void resolveImplicitLevels(int start, int limit, short sor, short eor)
3398    {
3399        byte dirProp;
3400        LevState levState = new LevState();
3401        int i, start1, start2;
3402        short oldStateImp, stateImp, actionImp;
3403        short gprop, resProp, cell;
3404        boolean inverseRTL;
3405        short nextStrongProp = R;
3406        int nextStrongPos = -1;
3407
3408        /* check for RTL inverse Bidi mode */
3409        /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
3410         * loop on the text characters from end to start.
3411         * This would need a different properties state table (at least different
3412         * actions) and different levels state tables (maybe very similar to the
3413         * LTR corresponding ones.
3414         */
3415        inverseRTL=((start<lastArabicPos) && ((GetParaLevelAt(start) & 1)>0) &&
3416                    (reorderingMode == REORDER_INVERSE_LIKE_DIRECT  ||
3417                     reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
3418        /* initialize for property and levels state table */
3419        levState.startL2EN = -1;        /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3420        levState.lastStrongRTL = -1;    /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
3421        levState.runStart = start;
3422        levState.runLevel = levels[start];
3423        levState.impTab = impTabPair.imptab[levState.runLevel & 1];
3424        levState.impAct = impTabPair.impact[levState.runLevel & 1];
3425        if (start == 0 && prologue != null) {
3426            byte lastStrong = lastL_R_AL();
3427            if (lastStrong != _ON) {
3428                sor = lastStrong;
3429            }
3430        }
3431        /* The isolates[] entries contain enough information to
3432           resume the bidi algorithm in the same state as it was
3433           when it was interrupted by an isolate sequence. */
3434        if (dirProps[start] == PDI) {
3435            levState.startON = isolates[isolateCount].startON;
3436            start1 = isolates[isolateCount].start1;
3437            stateImp = isolates[isolateCount].stateImp;
3438            levState.state = isolates[isolateCount].state;
3439            isolateCount--;
3440        } else {
3441            levState.startON = -1;
3442            start1 = start;
3443            if (dirProps[start] == NSM)
3444                stateImp = (short)(1 + sor);
3445            else
3446                stateImp = 0;
3447            levState.state = 0;
3448            processPropertySeq(levState, sor, start, start);
3449        }
3450        start2 = start;                 /* to make the Java compiler happy */
3451
3452        for (i = start; i <= limit; i++) {
3453            if (i >= limit) {
3454                int k;
3455                for (k = limit - 1;
3456                     k > start &&
3457                         (DirPropFlag(dirProps[k]) & MASK_BN_EXPLICIT) != 0;
3458                     k--);
3459                dirProp = dirProps[k];
3460                if (dirProp == LRI || dirProp == RLI)
3461                    break;  /* no forced closing for sequence ending with LRI/RLI */
3462                gprop = eor;
3463            } else {
3464                byte prop, prop1;
3465                prop = dirProps[i];
3466                if (prop == B)
3467                    isolateCount = -1;  /* current isolates stack entry == none */
3468                if (inverseRTL) {
3469                    if (prop == AL) {
3470                        /* AL before EN does not make it AN */
3471                        prop = R;
3472                    } else if (prop == EN) {
3473                        if (nextStrongPos <= i) {
3474                            /* look for next strong char (L/R/AL) */
3475                            int j;
3476                            nextStrongProp = R;     /* set default */
3477                            nextStrongPos = limit;
3478                            for (j = i+1; j < limit; j++) {
3479                                prop1 = dirProps[j];
3480                                if (prop1 == L || prop1 == R || prop1 == AL) {
3481                                    nextStrongProp = prop1;
3482                                    nextStrongPos = j;
3483                                    break;
3484                                }
3485                            }
3486                        }
3487                        if (nextStrongProp == AL) {
3488                            prop = AN;
3489                        }
3490                    }
3491                }
3492                gprop = groupProp[prop];
3493            }
3494            oldStateImp = stateImp;
3495            cell = impTabProps[oldStateImp][gprop];
3496            stateImp = GetStateProps(cell);     /* isolate the new state */
3497            actionImp = GetActionProps(cell);   /* isolate the action */
3498            if ((i == limit) && (actionImp == 0)) {
3499                /* there is an unprocessed sequence if its property == eor   */
3500                actionImp = 1;                  /* process the last sequence */
3501            }
3502            if (actionImp != 0) {
3503                resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
3504                switch (actionImp) {
3505                case 1:             /* process current seq1, init new seq1 */
3506                    processPropertySeq(levState, resProp, start1, i);
3507                    start1 = i;
3508                    break;
3509                case 2:             /* init new seq2 */
3510                    start2 = i;
3511                    break;
3512                case 3:             /* process seq1, process seq2, init new seq1 */
3513                    processPropertySeq(levState, resProp, start1, start2);
3514                    processPropertySeq(levState, _ON, start2, i);
3515                    start1 = i;
3516                    break;
3517                case 4:             /* process seq1, set seq1=seq2, init new seq2 */
3518                    processPropertySeq(levState, resProp, start1, start2);
3519                    start1 = start2;
3520                    start2 = i;
3521                    break;
3522                default:            /* we should never get here */
3523                    throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
3524                }
3525            }
3526        }
3527
3528        /* flush possible pending sequence, e.g. ON */
3529        if (limit == length && epilogue != null) {
3530            byte firstStrong = firstL_R_AL_EN_AN();
3531            if (firstStrong != _ON) {
3532                eor = firstStrong;
3533            }
3534        }
3535
3536        /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
3537        for (i = limit - 1;
3538             i > start &&
3539                 (DirPropFlag(dirProps[i]) & MASK_BN_EXPLICIT) != 0;
3540             i--);
3541        dirProp = dirProps[i];
3542        if ((dirProp == LRI || dirProp == RLI) && limit < length) {
3543            isolateCount++;
3544            if (isolates[isolateCount] == null)
3545                isolates[isolateCount] = new Isolate();
3546            isolates[isolateCount].stateImp = stateImp;
3547            isolates[isolateCount].state = levState.state;
3548            isolates[isolateCount].start1 = start1;
3549            isolates[isolateCount].startON = levState.startON;
3550        }
3551        else
3552            processPropertySeq(levState, eor, limit, limit);
3553    }
3554
3555    /* perform (L1) and (X9) ---------------------------------------------------- */
3556
3557    /*
3558     * Reset the embedding levels for some non-graphic characters (L1).
3559     * This method also sets appropriate levels for BN, and
3560     * explicit embedding types that are supposed to have been removed
3561     * from the paragraph in (X9).
3562     */
3563    private void adjustWSLevels() {
3564        int i;
3565
3566        if ((flags & MASK_WS) != 0) {
3567            int flag;
3568            i = trailingWSStart;
3569            while (i > 0) {
3570                /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
3571                while (i > 0 && ((flag = DirPropFlag(dirProps[--i])) & MASK_WS) != 0) {
3572                    if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3573                        levels[i] = 0;
3574                    } else {
3575                        levels[i] = GetParaLevelAt(i);
3576                    }
3577                }
3578
3579                /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
3580                /* here, i+1 is guaranteed to be <length */
3581                while (i > 0) {
3582                    flag = DirPropFlag(dirProps[--i]);
3583                    if ((flag & MASK_BN_EXPLICIT) != 0) {
3584                        levels[i] = levels[i + 1];
3585                    } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
3586                        levels[i] = 0;
3587                        break;
3588                    } else if ((flag & MASK_B_S) != 0){
3589                        levels[i] = GetParaLevelAt(i);
3590                        break;
3591                    }
3592                }
3593            }
3594        }
3595    }
3596
3597    /**
3598     * Set the context before a call to setPara().<p>
3599     *
3600     * setPara() computes the left-right directionality for a given piece
3601     * of text which is supplied as one of its arguments. Sometimes this piece
3602     * of text (the "main text") should be considered in context, because text
3603     * appearing before ("prologue") and/or after ("epilogue") the main text
3604     * may affect the result of this computation.<p>
3605     *
3606     * This function specifies the prologue and/or the epilogue for the next
3607     * call to setPara(). If successive calls to setPara()
3608     * all need specification of a context, setContext() must be called
3609     * before each call to setPara(). In other words, a context is not
3610     * "remembered" after the following successful call to setPara().<p>
3611     *
3612     * If a call to setPara() specifies DEFAULT_LTR or
3613     * DEFAULT_RTL as paraLevel and is preceded by a call to
3614     * setContext() which specifies a prologue, the paragraph level will
3615     * be computed taking in consideration the text in the prologue.<p>
3616     *
3617     * When setPara() is called without a previous call to
3618     * setContext, the main text is handled as if preceded and followed
3619     * by strong directional characters at the current paragraph level.
3620     * Calling setContext() with specification of a prologue will change
3621     * this behavior by handling the main text as if preceded by the last
3622     * strong character appearing in the prologue, if any.
3623     * Calling setContext() with specification of an epilogue will change
3624     * the behavior of setPara() by handling the main text as if followed
3625     * by the first strong character or digit appearing in the epilogue, if any.<p>
3626     *
3627     * Note 1: if <code>setContext</code> is called repeatedly without
3628     *         calling <code>setPara</code>, the earlier calls have no effect,
3629     *         only the last call will be remembered for the next call to
3630     *         <code>setPara</code>.<p>
3631     *
3632     * Note 2: calling <code>setContext(null, null)</code>
3633     *         cancels any previous setting of non-empty prologue or epilogue.
3634     *         The next call to <code>setPara()</code> will process no
3635     *         prologue or epilogue.<p>
3636     *
3637     * Note 3: users must be aware that even after setting the context
3638     *         before a call to setPara() to perform e.g. a logical to visual
3639     *         transformation, the resulting string may not be identical to what it
3640     *         would have been if all the text, including prologue and epilogue, had
3641     *         been processed together.<br>
3642     * Example (upper case letters represent RTL characters):<br>
3643     * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
3644     * &nbsp;&nbsp;epilogue = none<br>
3645     * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
3646     * &nbsp;&nbsp;paraLevel = LTR<br>
3647     * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
3648     *             ("HGF" is adjacent to "xyz")<br>
3649     * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
3650     *             ("HGF" is not adjacent to "xyz")<br>
3651     *
3652     * @param prologue is the text which precedes the text that
3653     *        will be specified in a coming call to setPara().
3654     *        If there is no prologue to consider,
3655     *        this parameter can be <code>null</code>.
3656     *
3657     * @param epilogue is the text which follows the text that
3658     *        will be specified in a coming call to setPara().
3659     *        If there is no epilogue to consider,
3660     *        this parameter can be <code>null</code>.
3661     *
3662     * @see #setPara
3663     * @stable ICU 4.8
3664     */
3665    public void setContext(String prologue, String epilogue) {
3666        this.prologue = prologue != null && prologue.length() > 0 ? prologue : null;
3667        this.epilogue = epilogue != null && epilogue.length() > 0 ? epilogue : null;
3668    }
3669
3670    private void setParaSuccess() {
3671        prologue = null;                /* forget the last context */
3672        epilogue = null;
3673        paraBidi = this;                /* mark successful setPara */
3674    }
3675
3676    int Bidi_Min(int x, int y) {
3677        return x < y ? x : y;
3678    }
3679
3680    int Bidi_Abs(int x) {
3681        return x >= 0 ? x : -x;
3682    }
3683
3684    void setParaRunsOnly(char[] parmText, byte parmParaLevel) {
3685        int[] visualMap;
3686        String visualText;
3687        int saveLength, saveTrailingWSStart;
3688        byte[] saveLevels;
3689        byte saveDirection;
3690        int i, j, visualStart, logicalStart,
3691            oldRunCount, runLength, addedRuns, insertRemove,
3692            start, limit, step, indexOddBit, logicalPos,
3693            index, index1;
3694        int saveOptions;
3695
3696        reorderingMode = REORDER_DEFAULT;
3697        int parmLength = parmText.length;
3698        if (parmLength == 0) {
3699            setPara(parmText, parmParaLevel, null);
3700            reorderingMode = REORDER_RUNS_ONLY;
3701            return;
3702        }
3703        /* obtain memory for mapping table and visual text */
3704        saveOptions = reorderingOptions;
3705        if ((saveOptions & OPTION_INSERT_MARKS) > 0) {
3706            reorderingOptions &= ~OPTION_INSERT_MARKS;
3707            reorderingOptions |= OPTION_REMOVE_CONTROLS;
3708        }
3709        parmParaLevel &= 1;             /* accept only 0 or 1 */
3710        setPara(parmText, parmParaLevel, null);
3711        /* we cannot access directly levels since it is not yet set if
3712         * direction is not MIXED
3713         */
3714        saveLevels = new byte[this.length];
3715        System.arraycopy(getLevels(), 0, saveLevels, 0, this.length);
3716        saveTrailingWSStart = trailingWSStart;
3717
3718        /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
3719         * the visual map and the dirProps array to drive the second call
3720         * to setPara (but must make provision for possible removal of
3721         * Bidi controls.  Alternatively, only use the dirProps array via
3722         * customized classifier callback.
3723         */
3724        visualText = writeReordered(DO_MIRRORING);
3725        visualMap = getVisualMap();
3726        this.reorderingOptions = saveOptions;
3727        saveLength = this.length;
3728        saveDirection=this.direction;
3729
3730        this.reorderingMode = REORDER_INVERSE_LIKE_DIRECT;
3731        parmParaLevel ^= 1;
3732        setPara(visualText, parmParaLevel, null);
3733        BidiLine.getRuns(this);
3734        /* check if some runs must be split, count how many splits */
3735        addedRuns = 0;
3736        oldRunCount = this.runCount;
3737        visualStart = 0;
3738        for (i = 0; i < oldRunCount; i++, visualStart += runLength) {
3739            runLength = runs[i].limit - visualStart;
3740            if (runLength < 2) {
3741                continue;
3742            }
3743            logicalStart = runs[i].start;
3744            for (j = logicalStart+1; j < logicalStart+runLength; j++) {
3745                index = visualMap[j];
3746                index1 = visualMap[j-1];
3747                if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3748                    addedRuns++;
3749                }
3750            }
3751        }
3752        if (addedRuns > 0) {
3753            getRunsMemory(oldRunCount + addedRuns);
3754            if (runCount == 1) {
3755                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
3756                runsMemory[0] = runs[0];
3757            } else {
3758                System.arraycopy(runs, 0, runsMemory, 0, runCount);
3759            }
3760            runs = runsMemory;
3761            runCount += addedRuns;
3762            for (i = oldRunCount; i < runCount; i++) {
3763                if (runs[i] == null) {
3764                    runs[i] = new BidiRun(0, 0, (byte)0);
3765                }
3766            }
3767        }
3768        /* split runs which are not consecutive in source text */
3769        int newI;
3770        for (i = oldRunCount-1; i >= 0; i--) {
3771            newI = i + addedRuns;
3772            runLength = i==0 ? runs[0].limit :
3773                               runs[i].limit - runs[i-1].limit;
3774            logicalStart = runs[i].start;
3775            indexOddBit = runs[i].level & 1;
3776            if (runLength < 2) {
3777                if (addedRuns > 0) {
3778                    runs[newI].copyFrom(runs[i]);
3779                }
3780                logicalPos = visualMap[logicalStart];
3781                runs[newI].start = logicalPos;
3782                runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3783                continue;
3784            }
3785            if (indexOddBit > 0) {
3786                start = logicalStart;
3787                limit = logicalStart + runLength - 1;
3788                step = 1;
3789            } else {
3790                start = logicalStart + runLength - 1;
3791                limit = logicalStart;
3792                step = -1;
3793            }
3794            for (j = start; j != limit; j += step) {
3795                index = visualMap[j];
3796                index1 = visualMap[j+step];
3797                if ((Bidi_Abs(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
3798                    logicalPos = Bidi_Min(visualMap[start], index);
3799                    runs[newI].start = logicalPos;
3800                    runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3801                    runs[newI].limit = runs[i].limit;
3802                    runs[i].limit -= Bidi_Abs(j - start) + 1;
3803                    insertRemove = runs[i].insertRemove & (LRM_AFTER|RLM_AFTER);
3804                    runs[newI].insertRemove = insertRemove;
3805                    runs[i].insertRemove &= ~insertRemove;
3806                    start = j + step;
3807                    addedRuns--;
3808                    newI--;
3809                }
3810            }
3811            if (addedRuns > 0) {
3812                runs[newI].copyFrom(runs[i]);
3813            }
3814            logicalPos = Bidi_Min(visualMap[start], visualMap[limit]);
3815            runs[newI].start = logicalPos;
3816            runs[newI].level = (byte)(saveLevels[logicalPos] ^ indexOddBit);
3817        }
3818
3819//    cleanup1:
3820        /* restore initial paraLevel */
3821        this.paraLevel ^= 1;
3822//    cleanup2:
3823        /* restore real text */
3824        this.text = parmText;
3825        this.length = saveLength;
3826        this.originalLength = parmLength;
3827        this.direction=saveDirection;
3828        this.levels = saveLevels;
3829        this.trailingWSStart = saveTrailingWSStart;
3830        if (runCount > 1) {
3831            this.direction = MIXED;
3832        }
3833//    cleanup3:
3834        this.reorderingMode = REORDER_RUNS_ONLY;
3835    }
3836
3837    /**
3838     * Perform the Unicode Bidi algorithm. It is defined in the
3839     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
3840     *
3841     * <p>This method takes a piece of plain text containing one or more paragraphs,
3842     * with or without externally specified embedding levels from <i>styled</i>
3843     * text and computes the left-right-directionality of each character.</p>
3844     *
3845     * <p>If the entire text is all of the same directionality, then
3846     * the method may not perform all the steps described by the algorithm,
3847     * i.e., some levels may not be the same as if all steps were performed.
3848     * This is not relevant for unidirectional text.<br>
3849     * For example, in pure LTR text with numbers the numbers would get
3850     * a resolved level of 2 higher than the surrounding text according to
3851     * the algorithm. This implementation may set all resolved levels to
3852     * the same value in such a case.</p>
3853     *
3854     * <p>The text can be composed of multiple paragraphs. Occurrence of a block
3855     * separator in the text terminates a paragraph, and whatever comes next starts
3856     * a new paragraph. The exception to this rule is when a Carriage Return (CR)
3857     * is followed by a Line Feed (LF). Both CR and LF are block separators, but
3858     * in that case, the pair of characters is considered as terminating the
3859     * preceding paragraph, and a new paragraph will be started by a character
3860     * coming after the LF.
3861     *
3862     * <p>Although the text is passed here as a <code>String</code>, it is
3863     * stored internally as an array of characters. Therefore the
3864     * documentation will refer to indexes of the characters in the text.
3865     *
3866     * @param text contains the text that the Bidi algorithm will be performed
3867     *        on. This text can be retrieved with <code>getText()</code> or
3868     *        <code>getTextAsString</code>.<br>
3869     *
3870     * @param paraLevel specifies the default level for the text;
3871     *        it is typically 0 (LTR) or 1 (RTL).
3872     *        If the method shall determine the paragraph level from the text,
3873     *        then <code>paraLevel</code> can be set to
3874     *        either <code>LEVEL_DEFAULT_LTR</code>
3875     *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3876     *        paragraphs, the paragraph level shall be determined separately for
3877     *        each paragraph; if a paragraph does not include any strongly typed
3878     *        character, then the desired default is used (0 for LTR or 1 for RTL).
3879     *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3880     *        is also valid, with odd levels indicating RTL.
3881     *
3882     * @param embeddingLevels (in) may be used to preset the embedding and override levels,
3883     *        ignoring characters like LRE and PDF in the text.
3884     *        A level overrides the directional property of its corresponding
3885     *        (same index) character if the level has the
3886     *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3887     *        Aside from that bit, it must be
3888     *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
3889     *        except that level 0 is always allowed.
3890     *        Level 0 for a paragraph separator prevents reordering of paragraphs;
3891     *        this only works reliably if <code>LEVEL_OVERRIDE</code>
3892     *        is also set for paragraph separators.
3893     *        Level 0 for other characters is treated as a wildcard
3894     *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
3895     *        <strong>Caution: </strong>A reference to this array, not a copy
3896     *        of the levels, will be stored in the <code>Bidi</code> object;
3897     *        the <code>embeddingLevels</code>
3898     *        should not be modified to avoid unexpected results on subsequent
3899     *        Bidi operations. However, the <code>setPara()</code> and
3900     *        <code>setLine()</code> methods may modify some or all of the
3901     *        levels.<br><br>
3902     *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
3903     *        have one entry for each character in <code>text</code>.
3904     *
3905     * @throws IllegalArgumentException if the values in embeddingLevels are
3906     *         not within the allowed range
3907     *
3908     * @see #LEVEL_DEFAULT_LTR
3909     * @see #LEVEL_DEFAULT_RTL
3910     * @see #LEVEL_OVERRIDE
3911     * @see #MAX_EXPLICIT_LEVEL
3912     * @stable ICU 3.8
3913     */
3914    public void setPara(String text, byte paraLevel, byte[] embeddingLevels)
3915    {
3916        if (text == null) {
3917            setPara(new char[0], paraLevel, embeddingLevels);
3918        } else {
3919            setPara(text.toCharArray(), paraLevel, embeddingLevels);
3920        }
3921    }
3922
3923    /**
3924     * Perform the Unicode Bidi algorithm. It is defined in the
3925     * <a href="http://www.unicode.org/reports/tr9/">Unicode Standard Annex #9</a>.
3926     *
3927     * <p>This method takes a piece of plain text containing one or more paragraphs,
3928     * with or without externally specified embedding levels from <i>styled</i>
3929     * text and computes the left-right-directionality of each character.</p>
3930     *
3931     * <p>If the entire text is all of the same directionality, then
3932     * the method may not perform all the steps described by the algorithm,
3933     * i.e., some levels may not be the same as if all steps were performed.
3934     * This is not relevant for unidirectional text.<br>
3935     * For example, in pure LTR text with numbers the numbers would get
3936     * a resolved level of 2 higher than the surrounding text according to
3937     * the algorithm. This implementation may set all resolved levels to
3938     * the same value in such a case.</p>
3939     *
3940     * <p>The text can be composed of multiple paragraphs. Occurrence of a block
3941     * separator in the text terminates a paragraph, and whatever comes next starts
3942     * a new paragraph. The exception to this rule is when a Carriage Return (CR)
3943     * is followed by a Line Feed (LF). Both CR and LF are block separators, but
3944     * in that case, the pair of characters is considered as terminating the
3945     * preceding paragraph, and a new paragraph will be started by a character
3946     * coming after the LF.
3947     *
3948     * <p>The text is stored internally as an array of characters. Therefore the
3949     * documentation will refer to indexes of the characters in the text.
3950     *
3951     * @param chars contains the text that the Bidi algorithm will be performed
3952     *        on. This text can be retrieved with <code>getText()</code> or
3953     *        <code>getTextAsString</code>.<br>
3954     *
3955     * @param paraLevel specifies the default level for the text;
3956     *        it is typically 0 (LTR) or 1 (RTL).
3957     *        If the method shall determine the paragraph level from the text,
3958     *        then <code>paraLevel</code> can be set to
3959     *        either <code>LEVEL_DEFAULT_LTR</code>
3960     *        or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
3961     *        paragraphs, the paragraph level shall be determined separately for
3962     *        each paragraph; if a paragraph does not include any strongly typed
3963     *        character, then the desired default is used (0 for LTR or 1 for RTL).
3964     *        Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
3965     *        is also valid, with odd levels indicating RTL.
3966     *
3967     * @param embeddingLevels (in) may be used to preset the embedding and
3968     *        override levels, ignoring characters like LRE and PDF in the text.
3969     *        A level overrides the directional property of its corresponding
3970     *        (same index) character if the level has the
3971     *        <code>LEVEL_OVERRIDE</code> bit set.<br><br>
3972     *        Aside from that bit, it must be
3973     *        <code>paraLevel&lt;=embeddingLevels[]&lt;=MAX_EXPLICIT_LEVEL</code>,
3974     *        except that level 0 is always allowed.
3975     *        Level 0 for a paragraph separator prevents reordering of paragraphs;
3976     *        this only works reliably if <code>LEVEL_OVERRIDE</code>
3977     *        is also set for paragraph separators.
3978     *        Level 0 for other characters is treated as a wildcard
3979     *        and is lifted up to the resolved level of the surrounding paragraph.<br><br>
3980     *        <strong>Caution: </strong>A reference to this array, not a copy
3981     *        of the levels, will be stored in the <code>Bidi</code> object;
3982     *        the <code>embeddingLevels</code>
3983     *        should not be modified to avoid unexpected results on subsequent
3984     *        Bidi operations. However, the <code>setPara()</code> and
3985     *        <code>setLine()</code> methods may modify some or all of the
3986     *        levels.<br><br>
3987     *        <strong>Note:</strong> the <code>embeddingLevels</code> array must
3988     *        have one entry for each character in <code>text</code>.
3989     *
3990     * @throws IllegalArgumentException if the values in embeddingLevels are
3991     *         not within the allowed range
3992     *
3993     * @see #LEVEL_DEFAULT_LTR
3994     * @see #LEVEL_DEFAULT_RTL
3995     * @see #LEVEL_OVERRIDE
3996     * @see #MAX_EXPLICIT_LEVEL
3997     * @stable ICU 3.8
3998     */
3999    public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
4000    {
4001        /* check the argument values */
4002        if (paraLevel < LEVEL_DEFAULT_LTR) {
4003            verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
4004        }
4005        if (chars == null) {
4006            chars = new char[0];
4007        }
4008
4009        /* special treatment for RUNS_ONLY mode */
4010        if (reorderingMode == REORDER_RUNS_ONLY) {
4011            setParaRunsOnly(chars, paraLevel);
4012            return;
4013        }
4014
4015        /* initialize the Bidi object */
4016        this.paraBidi = null;          /* mark unfinished setPara */
4017        this.text = chars;
4018        this.length = this.originalLength = this.resultLength = text.length;
4019        this.paraLevel = paraLevel;
4020        this.direction = (byte)(paraLevel & 1);
4021        this.paraCount = 1;
4022
4023        /* Allocate zero-length arrays instead of setting to null here; then
4024         * checks for null in various places can be eliminated.
4025         */
4026        dirProps = new byte[0];
4027        levels = new byte[0];
4028        runs = new BidiRun[0];
4029        isGoodLogicalToVisualRunsMap = false;
4030        insertPoints.size = 0;          /* clean up from last call */
4031        insertPoints.confirmed = 0;     /* clean up from last call */
4032
4033        /*
4034         * Save the original paraLevel if contextual; otherwise, set to 0.
4035         */
4036        defaultParaLevel = IsDefaultLevel(paraLevel) ? paraLevel : 0;
4037
4038        if (length == 0) {
4039            /*
4040             * For an empty paragraph, create a Bidi object with the paraLevel and
4041             * the flags and the direction set but without allocating zero-length arrays.
4042             * There is nothing more to do.
4043             */
4044            if (IsDefaultLevel(paraLevel)) {
4045                this.paraLevel &= 1;
4046                defaultParaLevel = 0;
4047            }
4048            flags = DirPropFlagLR(paraLevel);
4049            runCount = 0;
4050            paraCount = 0;
4051            setParaSuccess();
4052            return;
4053        }
4054
4055        runCount = -1;
4056
4057        /*
4058         * Get the directional properties,
4059         * the flags bit-set, and
4060         * determine the paragraph level if necessary.
4061         */
4062        getDirPropsMemory(length);
4063        dirProps = dirPropsMemory;
4064        getDirProps();
4065        /* the processed length may have changed if OPTION_STREAMING is set */
4066        trailingWSStart = length;  /* the levels[] will reflect the WS run */
4067
4068        /* are explicit levels specified? */
4069        if (embeddingLevels == null) {
4070            /* no: determine explicit levels according to the (Xn) rules */
4071            getLevelsMemory(length);
4072            levels = levelsMemory;
4073            direction = resolveExplicitLevels();
4074        } else {
4075            /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
4076            levels = embeddingLevels;
4077            direction = checkExplicitLevels();
4078        }
4079
4080        /* allocate isolate memory */
4081        if (isolateCount > 0) {
4082            if (isolates == null || isolates.length < isolateCount)
4083                isolates = new Isolate[isolateCount + 3];   /* keep some reserve */
4084        }
4085        isolateCount = -1;              /* current isolates stack entry == none */
4086
4087        /*
4088         * The steps after (X9) in the Bidi algorithm are performed only if
4089         * the paragraph text has mixed directionality!
4090         */
4091        switch (direction) {
4092        case LTR:
4093            /* all levels are implicitly at paraLevel (important for getLevels()) */
4094            trailingWSStart = 0;
4095            break;
4096        case RTL:
4097            /* all levels are implicitly at paraLevel (important for getLevels()) */
4098            trailingWSStart = 0;
4099            break;
4100        default:
4101            /*
4102             *  Choose the right implicit state table
4103             */
4104            switch(reorderingMode) {
4105            case REORDER_DEFAULT:
4106                this.impTabPair = impTab_DEFAULT;
4107                break;
4108            case REORDER_NUMBERS_SPECIAL:
4109                this.impTabPair = impTab_NUMBERS_SPECIAL;
4110                break;
4111            case REORDER_GROUP_NUMBERS_WITH_R:
4112                this.impTabPair = impTab_GROUP_NUMBERS_WITH_R;
4113                break;
4114            case REORDER_RUNS_ONLY:
4115                /* we should never get here */
4116                throw new InternalError("Internal ICU error in setPara");
4117                /* break; */
4118            case REORDER_INVERSE_NUMBERS_AS_L:
4119                this.impTabPair = impTab_INVERSE_NUMBERS_AS_L;
4120                break;
4121            case REORDER_INVERSE_LIKE_DIRECT:
4122                if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
4123                    this.impTabPair = impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
4124                } else {
4125                    this.impTabPair = impTab_INVERSE_LIKE_DIRECT;
4126                }
4127                break;
4128            case REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
4129                if ((reorderingOptions & OPTION_INSERT_MARKS) != 0) {
4130                    this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
4131                } else {
4132                    this.impTabPair = impTab_INVERSE_FOR_NUMBERS_SPECIAL;
4133                }
4134                break;
4135            }
4136            /*
4137             * If there are no external levels specified and there
4138             * are no significant explicit level codes in the text,
4139             * then we can treat the entire paragraph as one run.
4140             * Otherwise, we need to perform the following rules on runs of
4141             * the text with the same embedding levels. (X10)
4142             * "Significant" explicit level codes are ones that actually
4143             * affect non-BN characters.
4144             * Examples for "insignificant" ones are empty embeddings
4145             * LRE-PDF, LRE-RLE-PDF-PDF, etc.
4146             */
4147            if (embeddingLevels == null && paraCount <= 1 &&
4148                (flags & DirPropFlagMultiRuns) == 0) {
4149                resolveImplicitLevels(0, length,
4150                        GetLRFromLevel(GetParaLevelAt(0)),
4151                        GetLRFromLevel(GetParaLevelAt(length - 1)));
4152            } else {
4153                /* sor, eor: start and end types of same-level-run */
4154                int start, limit = 0;
4155                byte level, nextLevel;
4156                short sor, eor;
4157
4158                /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
4159                level = GetParaLevelAt(0);
4160                nextLevel = levels[0];
4161                if (level < nextLevel) {
4162                    eor = GetLRFromLevel(nextLevel);
4163                } else {
4164                    eor = GetLRFromLevel(level);
4165                }
4166
4167                do {
4168                    /* determine start and limit of the run (end points just behind the run) */
4169
4170                    /* the values for this run's start are the same as for the previous run's end */
4171                    start = limit;
4172                    level = nextLevel;
4173                    if ((start > 0) && (dirProps[start - 1] == B)) {
4174                        /* except if this is a new paragraph, then set sor = para level */
4175                        sor = GetLRFromLevel(GetParaLevelAt(start));
4176                    } else {
4177                        sor = eor;
4178                    }
4179
4180                    /* search for the limit of this run */
4181                    while ((++limit < length) &&
4182                           ((levels[limit] == level) ||
4183                            ((DirPropFlag(dirProps[limit]) & MASK_BN_EXPLICIT) != 0))) {}
4184
4185                    /* get the correct level of the next run */
4186                    if (limit < length) {
4187                        nextLevel = levels[limit];
4188                    } else {
4189                        nextLevel = GetParaLevelAt(length - 1);
4190                    }
4191
4192                    /* determine eor from max(level, nextLevel); sor is last run's eor */
4193                    if (NoOverride(level) < NoOverride(nextLevel)) {
4194                        eor = GetLRFromLevel(nextLevel);
4195                    } else {
4196                        eor = GetLRFromLevel(level);
4197                    }
4198
4199                    /* if the run consists of overridden directional types, then there
4200                       are no implicit types to be resolved */
4201                    if ((level & LEVEL_OVERRIDE) == 0) {
4202                        resolveImplicitLevels(start, limit, sor, eor);
4203                    } else {
4204                        /* remove the LEVEL_OVERRIDE flags */
4205                        do {
4206                            levels[start++] &= ~LEVEL_OVERRIDE;
4207                        } while (start < limit);
4208                    }
4209                } while (limit  < length);
4210            }
4211
4212            /* reset the embedding levels for some non-graphic characters (L1), (X9) */
4213            adjustWSLevels();
4214
4215            break;
4216        }
4217        /* add RLM for inverse Bidi with contextual orientation resolving
4218         * to RTL which would not round-trip otherwise
4219         */
4220        if ((defaultParaLevel > 0) &&
4221            ((reorderingOptions & OPTION_INSERT_MARKS) != 0) &&
4222            ((reorderingMode == REORDER_INVERSE_LIKE_DIRECT) ||
4223             (reorderingMode == REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
4224            int start, last;
4225            byte level;
4226            byte dirProp;
4227            for (int i = 0; i < paraCount; i++) {
4228                last = paras_limit[i] - 1;
4229                level = paras_level[i];
4230                if (level == 0)
4231                    continue;           /* LTR paragraph */
4232                start = i == 0 ? 0 : paras_limit[i - 1];
4233                for (int j = last; j >= start; j--) {
4234                    dirProp = dirProps[j];
4235                    if (dirProp == L) {
4236                        if (j < last) {
4237                            while (dirProps[last] == B) {
4238                                last--;
4239                            }
4240                        }
4241                        addPoint(last, RLM_BEFORE);
4242                        break;
4243                    }
4244                    if ((DirPropFlag(dirProp) & MASK_R_AL) != 0) {
4245                        break;
4246                    }
4247                }
4248            }
4249        }
4250
4251        if ((reorderingOptions & OPTION_REMOVE_CONTROLS) != 0) {
4252            resultLength -= controlCount;
4253        } else {
4254            resultLength += insertPoints.size;
4255        }
4256        setParaSuccess();
4257    }
4258
4259    /**
4260     * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
4261     * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
4262     * version 13,
4263     * also described in The Unicode Standard, Version 4.0 .<p>
4264     *
4265     * This method takes a paragraph of text and computes the
4266     * left-right-directionality of each character. The text should not
4267     * contain any Unicode block separators.<p>
4268     *
4269     * The RUN_DIRECTION attribute in the text, if present, determines the base
4270     * direction (left-to-right or right-to-left). If not present, the base
4271     * direction is computed using the Unicode Bidirectional Algorithm,
4272     * defaulting to left-to-right if there are no strong directional characters
4273     * in the text. This attribute, if present, must be applied to all the text
4274     * in the paragraph.<p>
4275     *
4276     * The BIDI_EMBEDDING attribute in the text, if present, represents
4277     * embedding level information. Negative values from -1 to -62 indicate
4278     * overrides at the absolute value of the level. Positive values from 1 to
4279     * 62 indicate embeddings. Where values are zero or not defined, the base
4280     * embedding level as determined by the base direction is assumed.<p>
4281     *
4282     * The NUMERIC_SHAPING attribute in the text, if present, converts European
4283     * digits to other decimal digits before running the bidi algorithm. This
4284     * attribute, if present, must be applied to all the text in the paragraph.
4285     *
4286     * If the entire text is all of the same directionality, then
4287     * the method may not perform all the steps described by the algorithm,
4288     * i.e., some levels may not be the same as if all steps were performed.
4289     * This is not relevant for unidirectional text.<br>
4290     * For example, in pure LTR text with numbers the numbers would get
4291     * a resolved level of 2 higher than the surrounding text according to
4292     * the algorithm. This implementation may set all resolved levels to
4293     * the same value in such a case.<p>
4294     *
4295     * @param paragraph a paragraph of text with optional character and
4296     *        paragraph attribute information
4297     * @stable ICU 3.8
4298     */
4299    public void setPara(AttributedCharacterIterator paragraph)
4300    {
4301        byte paraLvl;
4302        Boolean runDirection = (Boolean) paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
4303        if (runDirection == null) {
4304            paraLvl = LEVEL_DEFAULT_LTR;
4305        } else {
4306            paraLvl = (runDirection.equals(TextAttribute.RUN_DIRECTION_LTR)) ?
4307                        LTR : RTL;
4308        }
4309
4310        byte[] lvls = null;
4311        int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
4312        byte[] embeddingLevels = new byte[len];
4313        char[] txt = new char[len];
4314        int i = 0;
4315        char ch = paragraph.first();
4316        while (ch != AttributedCharacterIterator.DONE) {
4317            txt[i] = ch;
4318            Integer embedding = (Integer) paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
4319            if (embedding != null) {
4320                byte level = embedding.byteValue();
4321                if (level == 0) {
4322                    /* no-op */
4323                } else if (level < 0) {
4324                    lvls = embeddingLevels;
4325                    embeddingLevels[i] = (byte)((0 - level) | LEVEL_OVERRIDE);
4326                } else {
4327                    lvls = embeddingLevels;
4328                    embeddingLevels[i] = level;
4329                }
4330            }
4331            ch = paragraph.next();
4332            ++i;
4333        }
4334
4335        NumericShaper shaper = (NumericShaper) paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
4336        if (shaper != null) {
4337            shaper.shape(txt, 0, len);
4338        }
4339        setPara(txt, paraLvl, lvls);
4340    }
4341
4342    /**
4343     * Specify whether block separators must be allocated level zero,
4344     * so that successive paragraphs will progress from left to right.
4345     * This method must be called before <code>setPara()</code>.
4346     * Paragraph separators (B) may appear in the text.  Setting them to level zero
4347     * means that all paragraph separators (including one possibly appearing
4348     * in the last text position) are kept in the reordered text after the text
4349     * that they follow in the source text.
4350     * When this feature is not enabled, a paragraph separator at the last
4351     * position of the text before reordering will go to the first position
4352     * of the reordered text when the paragraph level is odd.
4353     *
4354     * @param ordarParaLTR specifies whether paragraph separators (B) must
4355     * receive level 0, so that successive paragraphs progress from left to right.
4356     *
4357     * @see #setPara
4358     * @stable ICU 3.8
4359     */
4360    public void orderParagraphsLTR(boolean ordarParaLTR) {
4361        orderParagraphsLTR = ordarParaLTR;
4362    }
4363
4364    /**
4365     * Is this <code>Bidi</code> object set to allocate level 0 to block
4366     * separators so that successive paragraphs progress from left to right?
4367     *
4368     * @return <code>true</code> if the <code>Bidi</code> object is set to
4369     *         allocate level 0 to block separators.
4370     *
4371     * @stable ICU 3.8
4372     */
4373    public boolean isOrderParagraphsLTR() {
4374        return orderParagraphsLTR;
4375    }
4376
4377    /**
4378     * Get the directionality of the text.
4379     *
4380     * @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
4381     *         that indicates if the entire text
4382     *         represented by this object is unidirectional,
4383     *         and which direction, or if it is mixed-directional.
4384     *
4385     * @throws IllegalStateException if this call is not preceded by a successful
4386     *         call to <code>setPara</code> or <code>setLine</code>
4387     *
4388     * @see #LTR
4389     * @see #RTL
4390     * @see #MIXED
4391     * @stable ICU 3.8
4392     */
4393    public byte getDirection()
4394    {
4395        verifyValidParaOrLine();
4396        return direction;
4397    }
4398
4399    /**
4400     * Get the text.
4401     *
4402     * @return A <code>String</code> containing the text that the
4403     *         <code>Bidi</code> object was created for.
4404     *
4405     * @throws IllegalStateException if this call is not preceded by a successful
4406     *         call to <code>setPara</code> or <code>setLine</code>
4407     *
4408     * @see #setPara
4409     * @see #setLine
4410     * @stable ICU 3.8
4411     */
4412    public String getTextAsString()
4413    {
4414        verifyValidParaOrLine();
4415        return new String(text);
4416    }
4417
4418    /**
4419     * Get the text.
4420     *
4421     * @return A <code>char</code> array containing the text that the
4422     *         <code>Bidi</code> object was created for.
4423     *
4424     * @throws IllegalStateException if this call is not preceded by a successful
4425     *         call to <code>setPara</code> or <code>setLine</code>
4426     *
4427     * @see #setPara
4428     * @see #setLine
4429     * @stable ICU 3.8
4430     */
4431    public char[] getText()
4432    {
4433        verifyValidParaOrLine();
4434        return text;
4435    }
4436
4437    /**
4438     * Get the length of the text.
4439     *
4440     * @return The length of the text that the <code>Bidi</code> object was
4441     *         created for.
4442     *
4443     * @throws IllegalStateException if this call is not preceded by a successful
4444     *         call to <code>setPara</code> or <code>setLine</code>
4445     * @stable ICU 3.8
4446     */
4447    public int getLength()
4448    {
4449        verifyValidParaOrLine();
4450        return originalLength;
4451    }
4452
4453    /**
4454     * Get the length of the source text processed by the last call to
4455     * <code>setPara()</code>. This length may be different from the length of
4456     * the source text if option <code>OPTION_STREAMING</code> has been
4457     * set.
4458     * <br>
4459     * Note that whenever the length of the text affects the execution or the
4460     * result of a method, it is the processed length which must be considered,
4461     * except for <code>setPara</code> (which receives unprocessed source text)
4462     * and <code>getLength</code> (which returns the original length of the
4463     * source text).<br>
4464     * In particular, the processed length is the one to consider in the
4465     * following cases:
4466     * <ul>
4467     * <li>maximum value of the <code>limit</code> argument of
4468     * <code>setLine</code></li>
4469     * <li>maximum value of the <code>charIndex</code> argument of
4470     * <code>getParagraph</code></li>
4471     * <li>maximum value of the <code>charIndex</code> argument of
4472     * <code>getLevelAt</code></li>
4473     * <li>number of elements in the array returned by <code>getLevels</code>
4474     * </li>
4475     * <li>maximum value of the <code>logicalStart</code> argument of
4476     * <code>getLogicalRun</code></li>
4477     * <li>maximum value of the <code>logicalIndex</code> argument of
4478     * <code>getVisualIndex</code></li>
4479     * <li>number of elements returned by <code>getLogicalMap</code></li>
4480     * <li>length of text processed by <code>writeReordered</code></li>
4481     * </ul>
4482     *
4483     * @return The length of the part of the source text processed by
4484     *         the last call to <code>setPara</code>.
4485     *
4486     * @throws IllegalStateException if this call is not preceded by a successful
4487     *         call to <code>setPara</code> or <code>setLine</code>
4488     *
4489     * @see #setPara
4490     * @see #OPTION_STREAMING
4491     * @stable ICU 3.8
4492     */
4493    public int getProcessedLength() {
4494        verifyValidParaOrLine();
4495        return length;
4496    }
4497
4498    /**
4499     * Get the length of the reordered text resulting from the last call to
4500     * <code>setPara()</code>. This length may be different from the length
4501     * of the source text if option <code>OPTION_INSERT_MARKS</code>
4502     * or option <code>OPTION_REMOVE_CONTROLS</code> has been set.
4503     * <br>
4504     * This resulting length is the one to consider in the following cases:
4505     * <ul>
4506     * <li>maximum value of the <code>visualIndex</code> argument of
4507     * <code>getLogicalIndex</code></li>
4508     * <li>number of elements returned by <code>getVisualMap</code></li>
4509     * </ul>
4510     * Note that this length stays identical to the source text length if
4511     * Bidi marks are inserted or removed using option bits of
4512     * <code>writeReordered</code>, or if option
4513     * <code>REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
4514     *
4515     * @return The length of the reordered text resulting from
4516     *         the last call to <code>setPara</code>.
4517     *
4518     * @throws IllegalStateException if this call is not preceded by a successful
4519     *         call to <code>setPara</code> or <code>setLine</code>
4520     *
4521     * @see #setPara
4522     * @see #OPTION_INSERT_MARKS
4523     * @see #OPTION_REMOVE_CONTROLS
4524     * @see #REORDER_INVERSE_NUMBERS_AS_L
4525     * @stable ICU 3.8
4526     */
4527    public int getResultLength() {
4528        verifyValidParaOrLine();
4529        return resultLength;
4530    }
4531
4532    /* paragraphs API methods ------------------------------------------------- */
4533
4534    /**
4535     * Get the paragraph level of the text.
4536     *
4537     * @return The paragraph level. If there are multiple paragraphs, their
4538     *         level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
4539     *         LEVEL_DEFAULT_RTL.  In that case, the level of the first paragraph
4540     *         is returned.
4541     *
4542     * @throws IllegalStateException if this call is not preceded by a successful
4543     *         call to <code>setPara</code> or <code>setLine</code>
4544     *
4545     * @see #LEVEL_DEFAULT_LTR
4546     * @see #LEVEL_DEFAULT_RTL
4547     * @see #getParagraph
4548     * @see #getParagraphByIndex
4549     * @stable ICU 3.8
4550     */
4551    public byte getParaLevel()
4552    {
4553        verifyValidParaOrLine();
4554        return paraLevel;
4555    }
4556
4557    /**
4558     * Get the number of paragraphs.
4559     *
4560     * @return The number of paragraphs.
4561     *
4562     * @throws IllegalStateException if this call is not preceded by a successful
4563     *         call to <code>setPara</code> or <code>setLine</code>
4564     * @stable ICU 3.8
4565     */
4566    public int countParagraphs()
4567    {
4568        verifyValidParaOrLine();
4569        return paraCount;
4570    }
4571
4572    /**
4573     * Get a paragraph, given the index of this paragraph.
4574     *
4575     * This method returns information about a paragraph.<p>
4576     *
4577     * @param paraIndex is the number of the paragraph, in the
4578     *        range <code>[0..countParagraphs()-1]</code>.
4579     *
4580     * @return a BidiRun object with the details of the paragraph:<br>
4581     *        <code>start</code> will receive the index of the first character
4582     *        of the paragraph in the text.<br>
4583     *        <code>limit</code> will receive the limit of the paragraph.<br>
4584     *        <code>embeddingLevel</code> will receive the level of the paragraph.
4585     *
4586     * @throws IllegalStateException if this call is not preceded by a successful
4587     *         call to <code>setPara</code> or <code>setLine</code>
4588     * @throws IllegalArgumentException if paraIndex is not in the range
4589     *        <code>[0..countParagraphs()-1]</code>
4590     *
4591     * @see com.ibm.icu.text.BidiRun
4592     * @stable ICU 3.8
4593     */
4594    public BidiRun getParagraphByIndex(int paraIndex)
4595    {
4596        verifyValidParaOrLine();
4597        verifyRange(paraIndex, 0, paraCount);
4598
4599        Bidi bidi = paraBidi;             /* get Para object if Line object */
4600        int paraStart;
4601        if (paraIndex == 0) {
4602            paraStart = 0;
4603        } else {
4604            paraStart = bidi.paras_limit[paraIndex - 1];
4605        }
4606        BidiRun bidiRun = new BidiRun();
4607        bidiRun.start = paraStart;
4608        bidiRun.limit = bidi.paras_limit[paraIndex];
4609        bidiRun.level = GetParaLevelAt(paraStart);
4610        return bidiRun;
4611    }
4612
4613    /**
4614     * Get a paragraph, given a position within the text.
4615     * This method returns information about a paragraph.<br>
4616     * Note: if the paragraph index is known, it is more efficient to
4617     * retrieve the paragraph information using getParagraphByIndex().<p>
4618     *
4619     * @param charIndex is the index of a character within the text, in the
4620     *        range <code>[0..getProcessedLength()-1]</code>.
4621     *
4622     * @return a BidiRun object with the details of the paragraph:<br>
4623     *        <code>start</code> will receive the index of the first character
4624     *        of the paragraph in the text.<br>
4625     *        <code>limit</code> will receive the limit of the paragraph.<br>
4626     *        <code>embeddingLevel</code> will receive the level of the paragraph.
4627     *
4628     * @throws IllegalStateException if this call is not preceded by a successful
4629     *         call to <code>setPara</code> or <code>setLine</code>
4630     * @throws IllegalArgumentException if charIndex is not within the legal range
4631     *
4632     * @see com.ibm.icu.text.BidiRun
4633     * @see #getParagraphByIndex
4634     * @see #getProcessedLength
4635     * @stable ICU 3.8
4636     */
4637    public BidiRun getParagraph(int charIndex)
4638    {
4639        verifyValidParaOrLine();
4640        Bidi bidi = paraBidi;             /* get Para object if Line object */
4641        verifyRange(charIndex, 0, bidi.length);
4642        int paraIndex;
4643        for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
4644        }
4645        return getParagraphByIndex(paraIndex);
4646    }
4647
4648    /**
4649     * Get the index of a paragraph, given a position within the text.<p>
4650     *
4651     * @param charIndex is the index of a character within the text, in the
4652     *        range <code>[0..getProcessedLength()-1]</code>.
4653     *
4654     * @return The index of the paragraph containing the specified position,
4655     *         starting from 0.
4656     *
4657     * @throws IllegalStateException if this call is not preceded by a successful
4658     *         call to <code>setPara</code> or <code>setLine</code>
4659     * @throws IllegalArgumentException if charIndex is not within the legal range
4660     *
4661     * @see com.ibm.icu.text.BidiRun
4662     * @see #getProcessedLength
4663     * @stable ICU 3.8
4664     */
4665    public int getParagraphIndex(int charIndex)
4666    {
4667        verifyValidParaOrLine();
4668        Bidi bidi = paraBidi;             /* get Para object if Line object */
4669        verifyRange(charIndex, 0, bidi.length);
4670        int paraIndex;
4671        for (paraIndex = 0; charIndex >= bidi.paras_limit[paraIndex]; paraIndex++) {
4672        }
4673        return paraIndex;
4674    }
4675
4676    /**
4677     * Set a custom Bidi classifier used by the UBA implementation for Bidi
4678     * class determination.
4679     *
4680     * @param classifier A new custom classifier. This can be null.
4681     *
4682     * @see #getCustomClassifier
4683     * @stable ICU 3.8
4684     */
4685    public void setCustomClassifier(BidiClassifier classifier) {
4686        this.customClassifier = classifier;
4687    }
4688
4689    /**
4690     * Gets the current custom class classifier used for Bidi class
4691     * determination.
4692     *
4693     * @return An instance of class <code>BidiClassifier</code>
4694     *
4695     * @see #setCustomClassifier
4696     * @stable ICU 3.8
4697     */
4698    public BidiClassifier getCustomClassifier() {
4699        return this.customClassifier;
4700    }
4701
4702    /**
4703     * Retrieves the Bidi class for a given code point.
4704     * <p>If a <code>BidiClassifier</code> is defined and returns a value
4705     * other than <code>CLASS_DEFAULT=UCharacter.getIntPropertyMaxValue(UProperty.BIDI_CLASS)+1</code>,
4706     * that value is used; otherwise the default class determination mechanism is invoked.
4707     *
4708     * @param c The code point to get a Bidi class for.
4709     *
4710     * @return The Bidi class for the character <code>c</code> that is in effect
4711     *         for this <code>Bidi</code> instance.
4712     *
4713     * @see BidiClassifier
4714     * @stable ICU 3.8
4715     */
4716    public int getCustomizedClass(int c) {
4717        int dir;
4718
4719        if (customClassifier == null ||
4720                (dir = customClassifier.classify(c)) == Bidi.CLASS_DEFAULT) {
4721            dir = bdp.getClass(c);
4722        }
4723        if (dir >= UCharacterDirection.CHAR_DIRECTION_COUNT)
4724            dir = ON;
4725        return dir;
4726    }
4727
4728    /**
4729     * <code>setLine()</code> returns a <code>Bidi</code> object to
4730     * contain the reordering information, especially the resolved levels,
4731     * for all the characters in a line of text. This line of text is
4732     * specified by referring to a <code>Bidi</code> object representing
4733     * this information for a piece of text containing one or more paragraphs,
4734     * and by specifying a range of indexes in this text.<p>
4735     * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
4736     *
4737     * This is used after calling <code>setPara()</code>
4738     * for a piece of text, and after line-breaking on that text.
4739     * It is not necessary if each paragraph is treated as a single line.<p>
4740     *
4741     * After line-breaking, rules (L1) and (L2) for the treatment of
4742     * trailing WS and for reordering are performed on
4743     * a <code>Bidi</code> object that represents a line.<p>
4744     *
4745     * <strong>Important: </strong>the line <code>Bidi</code> object may
4746     * reference data within the global text <code>Bidi</code> object.
4747     * You should not alter the content of the global text object until
4748     * you are finished using the line object.
4749     *
4750     * @param start is the line's first index into the text.
4751     *
4752     * @param limit is just behind the line's last index into the text
4753     *        (its last index +1).
4754     *
4755     * @return a <code>Bidi</code> object that will now represent a line of the text.
4756     *
4757     * @throws IllegalStateException if this call is not preceded by a successful
4758     *         call to <code>setPara</code>
4759     * @throws IllegalArgumentException if start and limit are not in the range
4760     *         <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
4761     *         or if the specified line crosses a paragraph boundary
4762     *
4763     * @see #setPara
4764     * @see #getProcessedLength
4765     * @stable ICU 3.8
4766     */
4767    public Bidi setLine(int start, int limit)
4768    {
4769        verifyValidPara();
4770        verifyRange(start, 0, limit);
4771        verifyRange(limit, 0, length+1);
4772        if (getParagraphIndex(start) != getParagraphIndex(limit - 1)) {
4773            /* the line crosses a paragraph boundary */
4774            throw new IllegalArgumentException();
4775        }
4776        return BidiLine.setLine(this, start, limit);
4777    }
4778
4779    /**
4780     * Get the level for one character.
4781     *
4782     * @param charIndex the index of a character.
4783     *
4784     * @return The level for the character at <code>charIndex</code>.
4785     *
4786     * @throws IllegalStateException if this call is not preceded by a successful
4787     *         call to <code>setPara</code> or <code>setLine</code>
4788     * @throws IllegalArgumentException if charIndex is not in the range
4789     *         <code>0&lt;=charIndex&lt;getProcessedLength()</code>
4790     *
4791     * @see #getProcessedLength
4792     * @stable ICU 3.8
4793     */
4794    public byte getLevelAt(int charIndex)
4795    {
4796        verifyValidParaOrLine();
4797        verifyRange(charIndex, 0, length);
4798        return BidiLine.getLevelAt(this, charIndex);
4799    }
4800
4801    /**
4802     * Get an array of levels for each character.<p>
4803     *
4804     * Note that this method may allocate memory under some
4805     * circumstances, unlike <code>getLevelAt()</code>.
4806     *
4807     * @return The levels array for the text,
4808     *         or <code>null</code> if an error occurs.
4809     *
4810     * @throws IllegalStateException if this call is not preceded by a successful
4811     *         call to <code>setPara</code> or <code>setLine</code>
4812     * @stable ICU 3.8
4813     */
4814    public byte[] getLevels()
4815    {
4816        verifyValidParaOrLine();
4817        if (length <= 0) {
4818            return new byte[0];
4819        }
4820        return BidiLine.getLevels(this);
4821    }
4822
4823    /**
4824     * Get a logical run.
4825     * This method returns information about a run and is used
4826     * to retrieve runs in logical order.<p>
4827     * This is especially useful for line-breaking on a paragraph.
4828     *
4829     * @param logicalPosition is a logical position within the source text.
4830     *
4831     * @return a BidiRun object filled with <code>start</code> containing
4832     *        the first character of the run, <code>limit</code> containing
4833     *        the limit of the run, and <code>embeddingLevel</code> containing
4834     *        the level of the run.
4835     *
4836     * @throws IllegalStateException if this call is not preceded by a successful
4837     *         call to <code>setPara</code> or <code>setLine</code>
4838     * @throws IllegalArgumentException if logicalPosition is not in the range
4839     *         <code>0&lt;=logicalPosition&lt;getProcessedLength()</code>
4840     *
4841     * @see com.ibm.icu.text.BidiRun
4842     * @see com.ibm.icu.text.BidiRun#getStart()
4843     * @see com.ibm.icu.text.BidiRun#getLimit()
4844     * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
4845     *
4846     * @stable ICU 3.8
4847     */
4848    public BidiRun getLogicalRun(int logicalPosition)
4849    {
4850        verifyValidParaOrLine();
4851        verifyRange(logicalPosition, 0, length);
4852        return BidiLine.getLogicalRun(this, logicalPosition);
4853    }
4854
4855    /**
4856     * Get the number of runs.
4857     * This method may invoke the actual reordering on the
4858     * <code>Bidi</code> object, after <code>setPara()</code>
4859     * may have resolved only the levels of the text. Therefore,
4860     * <code>countRuns()</code> may have to allocate memory,
4861     * and may throw an exception if it fails to do so.
4862     *
4863     * @return The number of runs.
4864     *
4865     * @throws IllegalStateException if this call is not preceded by a successful
4866     *         call to <code>setPara</code> or <code>setLine</code>
4867     * @stable ICU 3.8
4868     */
4869    public int countRuns()
4870    {
4871        verifyValidParaOrLine();
4872        BidiLine.getRuns(this);
4873        return runCount;
4874    }
4875
4876    /**
4877     *
4878     * Get a <code>BidiRun</code> object according to its index. BidiRun methods
4879     * may be used to retrieve the run's logical start, length and level,
4880     * which can be even for an LTR run or odd for an RTL run.
4881     * In an RTL run, the character at the logical start is
4882     * visually on the right of the displayed run.
4883     * The length is the number of characters in the run.<p>
4884     * <code>countRuns()</code> is normally called
4885     * before the runs are retrieved.
4886     *
4887     * <p>
4888     *  Example:
4889     * <pre>
4890     *  Bidi bidi = new Bidi();
4891     *  String text = "abc 123 DEFG xyz";
4892     *  bidi.setPara(text, Bidi.RTL, null);
4893     *  int i, count=bidi.countRuns(), logicalStart, visualIndex=0, length;
4894     *  BidiRun run;
4895     *  for (i = 0; i &lt; count; ++i) {
4896     *      run = bidi.getVisualRun(i);
4897     *      logicalStart = run.getStart();
4898     *      length = run.getLength();
4899     *      if (Bidi.LTR == run.getEmbeddingLevel()) {
4900     *          do { // LTR
4901     *              show_char(text.charAt(logicalStart++), visualIndex++);
4902     *          } while (--length &gt; 0);
4903     *      } else {
4904     *          logicalStart += length;  // logicalLimit
4905     *          do { // RTL
4906     *              show_char(text.charAt(--logicalStart), visualIndex++);
4907     *          } while (--length &gt; 0);
4908     *      }
4909     *  }
4910     * </pre>
4911     * <p>
4912     * Note that in right-to-left runs, code like this places
4913     * second surrogates before first ones (which is generally a bad idea)
4914     * and combining characters before base characters.
4915     * <p>
4916     * Use of <code>{@link #writeReordered}</code>, optionally with the
4917     * <code>{@link #KEEP_BASE_COMBINING}</code> option, can be considered in
4918     * order to avoid these issues.
4919     *
4920     * @param runIndex is the number of the run in visual order, in the
4921     *        range <code>[0..countRuns()-1]</code>.
4922     *
4923     * @return a BidiRun object containing the details of the run. The
4924     *         directionality of the run is
4925     *         <code>LTR==0</code> or <code>RTL==1</code>,
4926     *         never <code>MIXED</code>.
4927     *
4928     * @throws IllegalStateException if this call is not preceded by a successful
4929     *         call to <code>setPara</code> or <code>setLine</code>
4930     * @throws IllegalArgumentException if <code>runIndex</code> is not in
4931     *         the range <code>0&lt;=runIndex&lt;countRuns()</code>
4932     *
4933     * @see #countRuns()
4934     * @see com.ibm.icu.text.BidiRun
4935     * @see com.ibm.icu.text.BidiRun#getStart()
4936     * @see com.ibm.icu.text.BidiRun#getLength()
4937     * @see com.ibm.icu.text.BidiRun#getEmbeddingLevel()
4938     * @stable ICU 3.8
4939     */
4940    public BidiRun getVisualRun(int runIndex)
4941    {
4942        verifyValidParaOrLine();
4943        BidiLine.getRuns(this);
4944        verifyRange(runIndex, 0, runCount);
4945        return BidiLine.getVisualRun(this, runIndex);
4946    }
4947
4948    /**
4949     * Get the visual position from a logical text position.
4950     * If such a mapping is used many times on the same
4951     * <code>Bidi</code> object, then calling
4952     * <code>getLogicalMap()</code> is more efficient.
4953     * <p>
4954     * The value returned may be <code>MAP_NOWHERE</code> if there is no
4955     * visual position because the corresponding text character is a Bidi
4956     * control removed from output by the option
4957     * <code>OPTION_REMOVE_CONTROLS</code>.
4958     * <p>
4959     * When the visual output is altered by using options of
4960     * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
4961     * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
4962     * <code>REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
4963     * be correct. It is advised to use, when possible, reordering options
4964     * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
4965     * <p>
4966     * Note that in right-to-left runs, this mapping places
4967     * second surrogates before first ones (which is generally a bad idea)
4968     * and combining characters before base characters.
4969     * Use of <code>{@link #writeReordered}</code>, optionally with the
4970     * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
4971     * of using the mapping, in order to avoid these issues.
4972     *
4973     * @param logicalIndex is the index of a character in the text.
4974     *
4975     * @return The visual position of this character.
4976     *
4977     * @throws IllegalStateException if this call is not preceded by a successful
4978     *         call to <code>setPara</code> or <code>setLine</code>
4979     * @throws IllegalArgumentException if <code>logicalIndex</code> is not in
4980     *         the range <code>0&lt;=logicalIndex&lt;getProcessedLength()</code>
4981     *
4982     * @see #getLogicalMap
4983     * @see #getLogicalIndex
4984     * @see #getProcessedLength
4985     * @see #MAP_NOWHERE
4986     * @see #OPTION_REMOVE_CONTROLS
4987     * @see #writeReordered
4988     * @stable ICU 3.8
4989     */
4990    public int getVisualIndex(int logicalIndex)
4991    {
4992        verifyValidParaOrLine();
4993        verifyRange(logicalIndex, 0, length);
4994        return BidiLine.getVisualIndex(this, logicalIndex);
4995    }
4996
4997
4998    /**
4999     * Get the logical text position from a visual position.
5000     * If such a mapping is used many times on the same
5001     * <code>Bidi</code> object, then calling
5002     * <code>getVisualMap()</code> is more efficient.
5003     * <p>
5004     * The value returned may be <code>MAP_NOWHERE</code> if there is no
5005     * logical position because the corresponding text character is a Bidi
5006     * mark inserted in the output by option
5007     * <code>OPTION_INSERT_MARKS</code>.
5008     * <p>
5009     * This is the inverse method to <code>getVisualIndex()</code>.
5010     * <p>
5011     * When the visual output is altered by using options of
5012     * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
5013     * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
5014     * <code>REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
5015     * be correct. It is advised to use, when possible, reordering options
5016     * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
5017     *
5018     * @param visualIndex is the visual position of a character.
5019     *
5020     * @return The index of this character in the text.
5021     *
5022     * @throws IllegalStateException if this call is not preceded by a successful
5023     *         call to <code>setPara</code> or <code>setLine</code>
5024     * @throws IllegalArgumentException if <code>visualIndex</code> is not in
5025     *         the range <code>0&lt;=visualIndex&lt;getResultLength()</code>
5026     *
5027     * @see #getVisualMap
5028     * @see #getVisualIndex
5029     * @see #getResultLength
5030     * @see #MAP_NOWHERE
5031     * @see #OPTION_INSERT_MARKS
5032     * @see #writeReordered
5033     * @stable ICU 3.8
5034     */
5035    public int getLogicalIndex(int visualIndex)
5036    {
5037        verifyValidParaOrLine();
5038        verifyRange(visualIndex, 0, resultLength);
5039        /* we can do the trivial cases without the runs array */
5040        if (insertPoints.size == 0 && controlCount == 0) {
5041            if (direction == LTR) {
5042                return visualIndex;
5043            }
5044            else if (direction == RTL) {
5045                return length - visualIndex - 1;
5046            }
5047        }
5048        BidiLine.getRuns(this);
5049        return BidiLine.getLogicalIndex(this, visualIndex);
5050    }
5051
5052    /**
5053     * Get a logical-to-visual index map (array) for the characters in the
5054     * <code>Bidi</code> (paragraph or line) object.
5055     * <p>
5056     * Some values in the map may be <code>MAP_NOWHERE</code> if the
5057     * corresponding text characters are Bidi controls removed from the visual
5058     * output by the option <code>OPTION_REMOVE_CONTROLS</code>.
5059     * <p>
5060     * When the visual output is altered by using options of
5061     * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
5062     * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
5063     * <code>REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
5064     * be correct. It is advised to use, when possible, reordering options
5065     * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
5066     * <p>
5067     * Note that in right-to-left runs, this mapping places
5068     * second surrogates before first ones (which is generally a bad idea)
5069     * and combining characters before base characters.
5070     * Use of <code>{@link #writeReordered}</code>, optionally with the
5071     * <code>{@link #KEEP_BASE_COMBINING}</code> option can be considered instead
5072     * of using the mapping, in order to avoid these issues.
5073     *
5074     * @return an array of <code>getProcessedLength()</code>
5075     *        indexes which will reflect the reordering of the characters.<br><br>
5076     *        The index map will result in
5077     *        <code>indexMap[logicalIndex]==visualIndex</code>, where
5078     *        <code>indexMap</code> represents the returned array.
5079     *
5080     * @throws IllegalStateException if this call is not preceded by a successful
5081     *         call to <code>setPara</code> or <code>setLine</code>
5082     *
5083     * @see #getVisualMap
5084     * @see #getVisualIndex
5085     * @see #getProcessedLength
5086     * @see #MAP_NOWHERE
5087     * @see #OPTION_REMOVE_CONTROLS
5088     * @see #writeReordered
5089     * @stable ICU 3.8
5090     */
5091    public int[] getLogicalMap()
5092    {
5093        /* countRuns() checks successful call to setPara/setLine */
5094        countRuns();
5095        if (length <= 0) {
5096            return new int[0];
5097        }
5098        return BidiLine.getLogicalMap(this);
5099    }
5100
5101    /**
5102     * Get a visual-to-logical index map (array) for the characters in the
5103     * <code>Bidi</code> (paragraph or line) object.
5104     * <p>
5105     * Some values in the map may be <code>MAP_NOWHERE</code> if the
5106     * corresponding text characters are Bidi marks inserted in the visual
5107     * output by the option <code>OPTION_INSERT_MARKS</code>.
5108     * <p>
5109     * When the visual output is altered by using options of
5110     * <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
5111     * <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
5112     * <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
5113     * be correct. It is advised to use, when possible, reordering options
5114     * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
5115     *
5116     * @return an array of <code>getResultLength()</code>
5117     *        indexes which will reflect the reordering of the characters.<br><br>
5118     *        The index map will result in
5119     *        <code>indexMap[visualIndex]==logicalIndex</code>, where
5120     *        <code>indexMap</code> represents the returned array.
5121     *
5122     * @throws IllegalStateException if this call is not preceded by a successful
5123     *         call to <code>setPara</code> or <code>setLine</code>
5124     *
5125     * @see #getLogicalMap
5126     * @see #getLogicalIndex
5127     * @see #getResultLength
5128     * @see #MAP_NOWHERE
5129     * @see #OPTION_INSERT_MARKS
5130     * @see #writeReordered
5131     * @stable ICU 3.8
5132     */
5133    public int[] getVisualMap()
5134    {
5135        /* countRuns() checks successful call to setPara/setLine */
5136        countRuns();
5137        if (resultLength <= 0) {
5138            return new int[0];
5139        }
5140        return BidiLine.getVisualMap(this);
5141    }
5142
5143    /**
5144     * This is a convenience method that does not use a <code>Bidi</code> object.
5145     * It is intended to be used for when an application has determined the levels
5146     * of objects (character sequences) and just needs to have them reordered (L2).
5147     * This is equivalent to using <code>getLogicalMap()</code> on a
5148     * <code>Bidi</code> object.
5149     *
5150     * @param levels is an array of levels that have been determined by
5151     *        the application.
5152     *
5153     * @return an array of <code>levels.length</code>
5154     *        indexes which will reflect the reordering of the characters.<p>
5155     *        The index map will result in
5156     *        <code>indexMap[logicalIndex]==visualIndex</code>, where
5157     *        <code>indexMap</code> represents the returned array.
5158     *
5159     * @stable ICU 3.8
5160     */
5161    public static int[] reorderLogical(byte[] levels)
5162    {
5163        return BidiLine.reorderLogical(levels);
5164    }
5165
5166    /**
5167     * This is a convenience method that does not use a <code>Bidi</code> object.
5168     * It is intended to be used for when an application has determined the levels
5169     * of objects (character sequences) and just needs to have them reordered (L2).
5170     * This is equivalent to using <code>getVisualMap()</code> on a
5171     * <code>Bidi</code> object.
5172     *
5173     * @param levels is an array of levels that have been determined by
5174     *        the application.
5175     *
5176     * @return an array of <code>levels.length</code>
5177     *        indexes which will reflect the reordering of the characters.<p>
5178     *        The index map will result in
5179     *        <code>indexMap[visualIndex]==logicalIndex</code>, where
5180     *        <code>indexMap</code> represents the returned array.
5181     *
5182     * @stable ICU 3.8
5183     */
5184    public static int[] reorderVisual(byte[] levels)
5185    {
5186        return BidiLine.reorderVisual(levels);
5187    }
5188
5189    /**
5190     * Invert an index map.
5191     * The index mapping of the argument map is inverted and returned as
5192     * an array of indexes that we will call the inverse map.
5193     *
5194     * @param srcMap is an array whose elements define the original mapping
5195     * from a source array to a destination array.
5196     * Some elements of the source array may have no mapping in the
5197     * destination array. In that case, their value will be
5198     * the special value <code>MAP_NOWHERE</code>.
5199     * All elements must be &gt;=0 or equal to <code>MAP_NOWHERE</code>.
5200     * Some elements in the source map may have a value greater than the
5201     * srcMap.length if the destination array has more elements than the
5202     * source array.
5203     * There must be no duplicate indexes (two or more elements with the
5204     * same value except <code>MAP_NOWHERE</code>).
5205     *
5206     * @return an array representing the inverse map.
5207     *         This array has a number of elements equal to 1 + the highest
5208     *         value in <code>srcMap</code>.
5209     *         For elements of the result array which have no matching elements
5210     *         in the source array, the corresponding elements in the inverse
5211     *         map will receive a value equal to <code>MAP_NOWHERE</code>.
5212     *         If element with index i in <code>srcMap</code> has a value k different
5213     *         from <code>MAP_NOWHERE</code>, this means that element i of
5214     *         the source array maps to element k in the destination array.
5215     *         The inverse map will have value i in its k-th element.
5216     *         For all elements of the destination array which do not map to
5217     *         an element in the source array, the corresponding element in the
5218     *         inverse map will have a value equal to <code>MAP_NOWHERE</code>.
5219     *
5220     * @see #MAP_NOWHERE
5221     * @stable ICU 3.8
5222     */
5223    public static int[] invertMap(int[] srcMap)
5224    {
5225        if (srcMap == null) {
5226            return null;
5227        } else {
5228            return BidiLine.invertMap(srcMap);
5229        }
5230    }
5231
5232    /*
5233     * Fields and methods for compatibility with java.text.bidi (Sun implementation)
5234     */
5235
5236    /**
5237     * Constant indicating base direction is left-to-right.
5238     * @stable ICU 3.8
5239     */
5240    public static final int DIRECTION_LEFT_TO_RIGHT = LTR;
5241
5242    /**
5243     * Constant indicating base direction is right-to-left.
5244     * @stable ICU 3.8
5245     */
5246    public static final int DIRECTION_RIGHT_TO_LEFT = RTL;
5247
5248    /**
5249     * Constant indicating that the base direction depends on the first strong
5250     * directional character in the text according to the Unicode Bidirectional
5251     * Algorithm. If no strong directional character is present, the base
5252     * direction is left-to-right.
5253     * @stable ICU 3.8
5254     */
5255    public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = LEVEL_DEFAULT_LTR;
5256
5257    /**
5258     * Constant indicating that the base direction depends on the first strong
5259     * directional character in the text according to the Unicode Bidirectional
5260     * Algorithm. If no strong directional character is present, the base
5261     * direction is right-to-left.
5262     * @stable ICU 3.8
5263     */
5264    public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = LEVEL_DEFAULT_RTL;
5265
5266    /**
5267     * Create Bidi from the given paragraph of text and base direction.
5268     *
5269     * @param paragraph a paragraph of text
5270     * @param flags a collection of flags that control the algorithm. The
5271     *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
5272     *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
5273     *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
5274     * @see #DIRECTION_LEFT_TO_RIGHT
5275     * @see #DIRECTION_RIGHT_TO_LEFT
5276     * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
5277     * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
5278     * @stable ICU 3.8
5279     */
5280    public Bidi(String paragraph, int flags)
5281    {
5282        this(paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
5283    }
5284
5285    /**
5286     * Create Bidi from the given paragraph of text.<p>
5287     *
5288     * The RUN_DIRECTION attribute in the text, if present, determines the base
5289     * direction (left-to-right or right-to-left). If not present, the base
5290     * direction is computed using the Unicode Bidirectional Algorithm,
5291     * defaulting to left-to-right if there are no strong directional characters
5292     * in the text. This attribute, if present, must be applied to all the text
5293     * in the paragraph.<p>
5294     *
5295     * The BIDI_EMBEDDING attribute in the text, if present, represents
5296     * embedding level information. Negative values from -1 to -62 indicate
5297     * overrides at the absolute value of the level. Positive values from 1 to
5298     * 62 indicate embeddings. Where values are zero or not defined, the base
5299     * embedding level as determined by the base direction is assumed.<p>
5300     *
5301     * The NUMERIC_SHAPING attribute in the text, if present, converts European
5302     * digits to other decimal digits before running the bidi algorithm. This
5303     * attribute, if present, must be applied to all the text in the paragraph.<p>
5304     *
5305     * Note: this constructor calls setPara() internally.
5306     *
5307     * @param paragraph a paragraph of text with optional character and
5308     *        paragraph attribute information
5309     * @stable ICU 3.8
5310     */
5311    public Bidi(AttributedCharacterIterator paragraph)
5312    {
5313        this();
5314        setPara(paragraph);
5315    }
5316
5317    /**
5318     * Create Bidi from the given text, embedding, and direction information.
5319     *
5320     * <p>The embeddings array may be null. If present, the values represent
5321     * embedding level information.
5322     * Negative values from -1 to -{@link #MAX_EXPLICIT_LEVEL}
5323     * indicate overrides at the absolute value of the level.
5324     * Positive values from 1 to {@link #MAX_EXPLICIT_LEVEL} indicate embeddings.
5325     * Where values are zero, the base embedding level
5326     * as determined by the base direction is assumed,
5327     * except for paragraph separators which remain at 0 to prevent reordering of paragraphs.</p>
5328     *
5329     * <p>Note: This constructor calls setPara() internally,
5330     * after converting the java.text.Bidi-style embeddings with negative overrides
5331     * into ICU-style embeddings with bit fields for {@link #LEVEL_OVERRIDE} and the level.
5332     *
5333     * @param text an array containing the paragraph of text to process.
5334     * @param textStart the index into the text array of the start of the
5335     *        paragraph.
5336     * @param embeddings an array containing embedding values for each character
5337     *        in the paragraph. This can be null, in which case it is assumed
5338     *        that there is no external embedding information.
5339     * @param embStart the index into the embedding array of the start of the
5340     *        paragraph.
5341     * @param paragraphLength the length of the paragraph in the text and
5342     *        embeddings arrays.
5343     * @param flags a collection of flags that control the algorithm. The
5344     *        algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
5345     *        DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
5346     *        DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
5347     *
5348     * @throws IllegalArgumentException if the values in embeddings are
5349     *         not within the allowed range
5350     *
5351     * @see #DIRECTION_LEFT_TO_RIGHT
5352     * @see #DIRECTION_RIGHT_TO_LEFT
5353     * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
5354     * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
5355     * @stable ICU 3.8
5356     */
5357    public Bidi(char[] text,
5358            int textStart,
5359            byte[] embeddings,
5360            int embStart,
5361            int paragraphLength,
5362            int flags)
5363    {
5364        this();
5365        byte paraLvl;
5366        switch (flags) {
5367        case DIRECTION_LEFT_TO_RIGHT:
5368        default:
5369            paraLvl = LTR;
5370            break;
5371        case DIRECTION_RIGHT_TO_LEFT:
5372            paraLvl = RTL;
5373            break;
5374        case DIRECTION_DEFAULT_LEFT_TO_RIGHT:
5375            paraLvl = LEVEL_DEFAULT_LTR;
5376            break;
5377        case DIRECTION_DEFAULT_RIGHT_TO_LEFT:
5378            paraLvl = LEVEL_DEFAULT_RTL;
5379            break;
5380        }
5381        byte[] paraEmbeddings;
5382        if (embeddings == null) {
5383            paraEmbeddings = null;
5384        } else {
5385            // Convert from java.text.Bidi embeddings to ICU setPara() levels:
5386            // Copy to the start of a new array and convert java.text negative overrides
5387            // to ICU bit-field-and-mask overrides.
5388            // A copy of the embeddings is always required because
5389            // setPara() may modify its embeddings.
5390            paraEmbeddings = new byte[paragraphLength];
5391            byte lev;
5392            for (int i = 0; i < paragraphLength; i++) {
5393                lev = embeddings[i + embStart];
5394                if (lev < 0) {
5395                    lev = (byte)((- lev) | LEVEL_OVERRIDE);
5396                }
5397                // setPara() lifts level 0 up to the resolved paragraph level.
5398                paraEmbeddings[i] = lev;
5399            }
5400        }
5401        if (textStart == 0 && paragraphLength == text.length) {
5402            setPara(text, paraLvl, paraEmbeddings);
5403        } else {
5404            char[] paraText = new char[paragraphLength];
5405            System.arraycopy(text, textStart, paraText, 0, paragraphLength);
5406            setPara(paraText, paraLvl, paraEmbeddings);
5407        }
5408    }
5409
5410    /**
5411     * Create a Bidi object representing the bidi information on a line of text
5412     * within the paragraph represented by the current Bidi. This call is not
5413     * required if the entire paragraph fits on one line.
5414     *
5415     * @param lineStart the offset from the start of the paragraph to the start
5416     *        of the line.
5417     * @param lineLimit the offset from the start of the paragraph to the limit
5418     *        of the line.
5419     *
5420     * @throws IllegalStateException if this call is not preceded by a successful
5421     *         call to <code>setPara</code>
5422     * @throws IllegalArgumentException if lineStart and lineLimit are not in the range
5423     *         <code>0&lt;=lineStart&lt;lineLimit&lt;=getProcessedLength()</code>,
5424     *         or if the specified line crosses a paragraph boundary
5425     * @stable ICU 3.8
5426     */
5427    public Bidi createLineBidi(int lineStart, int lineLimit)
5428    {
5429        return setLine(lineStart, lineLimit);
5430    }
5431
5432    /**
5433     * Return true if the line is not left-to-right or right-to-left. This means
5434     * it either has mixed runs of left-to-right and right-to-left text, or the
5435     * base direction differs from the direction of the only run of text.
5436     *
5437     * @return true if the line is not left-to-right or right-to-left.
5438     *
5439     * @throws IllegalStateException if this call is not preceded by a successful
5440     *         call to <code>setPara</code>
5441     * @stable ICU 3.8
5442     */
5443    public boolean isMixed()
5444    {
5445        return (!isLeftToRight() && !isRightToLeft());
5446    }
5447
5448    /**
5449     * Return true if the line is all left-to-right text and the base direction
5450     * is left-to-right.
5451     *
5452     * @return true if the line is all left-to-right text and the base direction
5453     *         is left-to-right.
5454     *
5455     * @throws IllegalStateException if this call is not preceded by a successful
5456     *         call to <code>setPara</code>
5457     * @stable ICU 3.8
5458     */
5459    public boolean isLeftToRight()
5460    {
5461        return (getDirection() == LTR && (paraLevel & 1) == 0);
5462    }
5463
5464    /**
5465     * Return true if the line is all right-to-left text, and the base direction
5466     * is right-to-left
5467     *
5468     * @return true if the line is all right-to-left text, and the base
5469     *         direction is right-to-left
5470     *
5471     * @throws IllegalStateException if this call is not preceded by a successful
5472     *         call to <code>setPara</code>
5473     * @stable ICU 3.8
5474     */
5475    public boolean isRightToLeft()
5476    {
5477        return (getDirection() == RTL && (paraLevel & 1) == 1);
5478    }
5479
5480    /**
5481     * Return true if the base direction is left-to-right
5482     *
5483     * @return true if the base direction is left-to-right
5484     *
5485     * @throws IllegalStateException if this call is not preceded by a successful
5486     *         call to <code>setPara</code> or <code>setLine</code>
5487     *
5488     * @stable ICU 3.8
5489     */
5490    public boolean baseIsLeftToRight()
5491    {
5492        return (getParaLevel() == LTR);
5493    }
5494
5495    /**
5496     * Return the base level (0 if left-to-right, 1 if right-to-left).
5497     *
5498     * @return the base level
5499     *
5500     * @throws IllegalStateException if this call is not preceded by a successful
5501     *         call to <code>setPara</code> or <code>setLine</code>
5502     *
5503     * @stable ICU 3.8
5504     */
5505    public int getBaseLevel()
5506    {
5507        return getParaLevel();
5508    }
5509
5510    /**
5511     * Return the number of level runs.
5512     *
5513     * @return the number of level runs
5514     *
5515     * @throws IllegalStateException if this call is not preceded by a successful
5516     *         call to <code>setPara</code> or <code>setLine</code>
5517     *
5518     * @stable ICU 3.8
5519     */
5520    public int getRunCount()
5521    {
5522        return countRuns();
5523    }
5524
5525    /**
5526     * Compute the logical to visual run mapping
5527     */
5528     void getLogicalToVisualRunsMap()
5529     {
5530        if (isGoodLogicalToVisualRunsMap) {
5531            return;
5532        }
5533        int count = countRuns();
5534        if ((logicalToVisualRunsMap == null) ||
5535            (logicalToVisualRunsMap.length < count)) {
5536            logicalToVisualRunsMap = new int[count];
5537        }
5538        int i;
5539        long[] keys = new long[count];
5540        for (i = 0; i < count; i++) {
5541            keys[i] = ((long)(runs[i].start)<<32) + i;
5542        }
5543        Arrays.sort(keys);
5544        for (i = 0; i < count; i++) {
5545            logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
5546        }
5547        isGoodLogicalToVisualRunsMap = true;
5548     }
5549
5550    /**
5551     * Return the level of the nth logical run in this line.
5552     *
5553     * @param run the index of the run, between 0 and <code>countRuns()-1</code>
5554     *
5555     * @return the level of the run
5556     *
5557     * @throws IllegalStateException if this call is not preceded by a successful
5558     *         call to <code>setPara</code> or <code>setLine</code>
5559     * @throws IllegalArgumentException if <code>run</code> is not in
5560     *         the range <code>0&lt;=run&lt;countRuns()</code>
5561     * @stable ICU 3.8
5562     */
5563    public int getRunLevel(int run)
5564    {
5565        verifyValidParaOrLine();
5566        BidiLine.getRuns(this);
5567        verifyRange(run, 0, runCount);
5568        getLogicalToVisualRunsMap();
5569        return runs[logicalToVisualRunsMap[run]].level;
5570    }
5571
5572    /**
5573     * Return the index of the character at the start of the nth logical run in
5574     * this line, as an offset from the start of the line.
5575     *
5576     * @param run the index of the run, between 0 and <code>countRuns()</code>
5577     *
5578     * @return the start of the run
5579     *
5580     * @throws IllegalStateException if this call is not preceded by a successful
5581     *         call to <code>setPara</code> or <code>setLine</code>
5582     * @throws IllegalArgumentException if <code>run</code> is not in
5583     *         the range <code>0&lt;=run&lt;countRuns()</code>
5584     * @stable ICU 3.8
5585     */
5586    public int getRunStart(int run)
5587    {
5588        verifyValidParaOrLine();
5589        BidiLine.getRuns(this);
5590        verifyRange(run, 0, runCount);
5591        getLogicalToVisualRunsMap();
5592        return runs[logicalToVisualRunsMap[run]].start;
5593    }
5594
5595    /**
5596     * Return the index of the character past the end of the nth logical run in
5597     * this line, as an offset from the start of the line. For example, this
5598     * will return the length of the line for the last run on the line.
5599     *
5600     * @param run the index of the run, between 0 and <code>countRuns()</code>
5601     *
5602     * @return the limit of the run
5603     *
5604     * @throws IllegalStateException if this call is not preceded by a successful
5605     *         call to <code>setPara</code> or <code>setLine</code>
5606     * @throws IllegalArgumentException if <code>run</code> is not in
5607     *         the range <code>0&lt;=run&lt;countRuns()</code>
5608     * @stable ICU 3.8
5609     */
5610    public int getRunLimit(int run)
5611    {
5612        verifyValidParaOrLine();
5613        BidiLine.getRuns(this);
5614        verifyRange(run, 0, runCount);
5615        getLogicalToVisualRunsMap();
5616        int idx = logicalToVisualRunsMap[run];
5617        int len = idx == 0 ? runs[idx].limit :
5618                                runs[idx].limit - runs[idx-1].limit;
5619        return runs[idx].start + len;
5620    }
5621
5622    /**
5623     * Return true if the specified text requires bidi analysis. If this returns
5624     * false, the text will display left-to-right. Clients can then avoid
5625     * constructing a Bidi object. Text in the Arabic Presentation Forms area of
5626     * Unicode is presumed to already be shaped and ordered for display, and so
5627     * will not cause this method to return true.
5628     *
5629     * @param text the text containing the characters to test
5630     * @param start the start of the range of characters to test
5631     * @param limit the limit of the range of characters to test
5632     *
5633     * @return true if the range of characters requires bidi analysis
5634     *
5635     * @stable ICU 3.8
5636     */
5637    public static boolean requiresBidi(char[] text,
5638            int start,
5639            int limit)
5640    {
5641        final int RTLMask = (1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT |
5642                1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC |
5643                1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING |
5644                1 << UCharacter.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE |
5645                1 << UCharacter.DIRECTIONALITY_ARABIC_NUMBER);
5646
5647        for (int i = start; i < limit; ++i) {
5648            if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
5649                return true;
5650            }
5651        }
5652        return false;
5653    }
5654
5655    /**
5656     * Reorder the objects in the array into visual order based on their levels.
5657     * This is a utility method to use when you have a collection of objects
5658     * representing runs of text in logical order, each run containing text at a
5659     * single level. The elements at <code>index</code> from
5660     * <code>objectStart</code> up to <code>objectStart + count</code> in the
5661     * objects array will be reordered into visual order assuming
5662     * each run of text has the level indicated by the corresponding element in
5663     * the levels array (at <code>index - objectStart + levelStart</code>).
5664     *
5665     * @param levels an array representing the bidi level of each object
5666     * @param levelStart the start position in the levels array
5667     * @param objects the array of objects to be reordered into visual order
5668     * @param objectStart the start position in the objects array
5669     * @param count the number of objects to reorder
5670     * @stable ICU 3.8
5671     */
5672    public static void reorderVisually(byte[] levels,
5673            int levelStart,
5674            Object[] objects,
5675            int objectStart,
5676            int count)
5677    {
5678        byte[] reorderLevels = new byte[count];
5679        System.arraycopy(levels, levelStart, reorderLevels, 0, count);
5680        int[] indexMap = reorderVisual(reorderLevels);
5681        Object[] temp = new Object[count];
5682        System.arraycopy(objects, objectStart, temp, 0, count);
5683        for (int i = 0; i < count; ++i) {
5684            objects[objectStart + i] = temp[indexMap[i]];
5685        }
5686    }
5687
5688    /**
5689     * Take a <code>Bidi</code> object containing the reordering
5690     * information for a piece of text (one or more paragraphs) set by
5691     * <code>setPara()</code> or for a line of text set by <code>setLine()</code>
5692     * and return a string containing the reordered text.
5693     *
5694     * <p>The text may have been aliased (only a reference was stored
5695     * without copying the contents), thus it must not have been modified
5696     * since the <code>setPara()</code> call.
5697     *
5698     * This method preserves the integrity of characters with multiple
5699     * code units and (optionally) combining characters.
5700     * Characters in RTL runs can be replaced by mirror-image characters
5701     * in the returned string. Note that "real" mirroring has to be done in a
5702     * rendering engine by glyph selection and that for many "mirrored"
5703     * characters there are no Unicode characters as mirror-image equivalents.
5704     * There are also options to insert or remove Bidi control
5705     * characters; see the descriptions of the return value and the
5706     * <code>options</code> parameter, and of the option bit flags.
5707     *
5708     * @param options A bit set of options for the reordering that control
5709     *                how the reordered text is written.
5710     *                The options include mirroring the characters on a code
5711     *                point basis and inserting LRM characters, which is used
5712     *                especially for transforming visually stored text
5713     *                to logically stored text (although this is still an
5714     *                imperfect implementation of an "inverse Bidi" algorithm
5715     *                because it uses the "forward Bidi" algorithm at its core).
5716     *                The available options are:
5717     *                <code>DO_MIRRORING</code>,
5718     *                <code>INSERT_LRM_FOR_NUMERIC</code>,
5719     *                <code>KEEP_BASE_COMBINING</code>,
5720     *                <code>OUTPUT_REVERSE</code>,
5721     *                <code>REMOVE_BIDI_CONTROLS</code>,
5722     *                <code>STREAMING</code>
5723     *
5724     * @return The reordered text.
5725     *         If the <code>INSERT_LRM_FOR_NUMERIC</code> option is set, then
5726     *         the length of the returned string could be as large as
5727     *         <code>getLength()+2*countRuns()</code>.<br>
5728     *         If the <code>REMOVE_BIDI_CONTROLS</code> option is set, then the
5729     *         length of the returned string may be less than
5730     *         <code>getLength()</code>.<br>
5731     *         If none of these options is set, then the length of the returned
5732     *         string will be exactly <code>getProcessedLength()</code>.
5733     *
5734     * @throws IllegalStateException if this call is not preceded by a successful
5735     *         call to <code>setPara</code> or <code>setLine</code>
5736     *
5737     * @see #DO_MIRRORING
5738     * @see #INSERT_LRM_FOR_NUMERIC
5739     * @see #KEEP_BASE_COMBINING
5740     * @see #OUTPUT_REVERSE
5741     * @see #REMOVE_BIDI_CONTROLS
5742     * @see #OPTION_STREAMING
5743     * @see #getProcessedLength
5744     * @stable ICU 3.8
5745     */
5746    public String writeReordered(int options)
5747    {
5748        verifyValidParaOrLine();
5749        if (length == 0) {
5750            /* nothing to do */
5751            return "";
5752        }
5753        return BidiWriter.writeReordered(this, options);
5754    }
5755
5756    /**
5757     * Reverse a Right-To-Left run of Unicode text.
5758     *
5759     * This method preserves the integrity of characters with multiple
5760     * code units and (optionally) combining characters.
5761     * Characters can be replaced by mirror-image characters
5762     * in the destination buffer. Note that "real" mirroring has
5763     * to be done in a rendering engine by glyph selection
5764     * and that for many "mirrored" characters there are no
5765     * Unicode characters as mirror-image equivalents.
5766     * There are also options to insert or remove Bidi control
5767     * characters.
5768     *
5769     * This method is the implementation for reversing RTL runs as part
5770     * of <code>writeReordered()</code>. For detailed descriptions
5771     * of the parameters, see there.
5772     * Since no Bidi controls are inserted here, the output string length
5773     * will never exceed <code>src.length()</code>.
5774     *
5775     * @see #writeReordered
5776     *
5777     * @param src The RTL run text.
5778     *
5779     * @param options A bit set of options for the reordering that control
5780     *                how the reordered text is written.
5781     *                See the <code>options</code> parameter in <code>writeReordered()</code>.
5782     *
5783     * @return The reordered text.
5784     *         If the <code>REMOVE_BIDI_CONTROLS</code> option
5785     *         is set, then the length of the returned string may be less than
5786     *         <code>src.length()</code>. If this option is not set,
5787     *         then the length of the returned string will be exactly
5788     *         <code>src.length()</code>.
5789     *
5790     * @throws IllegalArgumentException if <code>src</code> is null.
5791     * @stable ICU 3.8
5792     */
5793    public static String writeReverse(String src, int options)
5794    {
5795        /* error checking */
5796        if (src == null) {
5797            throw new IllegalArgumentException();
5798        }
5799
5800        if (src.length() > 0) {
5801            return BidiWriter.writeReverse(src, options);
5802        } else {
5803            /* nothing to do */
5804            return "";
5805        }
5806    }
5807
5808}
5809